Command line
python /home/saxelrod/Repo/projects/chemprop/chemprop/train.py --config_path /home/saxelrod/synthetic/free_energy_ffn_fixed/train/config.json --data_path /home/saxelrod/synthetic/free_energy/train_full.csv --dataset_type regression
Args
{'activation': 'ReLU',
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'batch_size': 50,
 'bias': False,
 'cache_cutoff': 10000,
 'checkpoint_dir': None,
 'checkpoint_path': None,
 'checkpoint_paths': None,
 'class_balance': False,
 'config_path': '/home/saxelrod/synthetic/free_energy_ffn_fixed/train/config.json',
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'cuda': True,
 'data_path': '/home/saxelrod/synthetic/free_energy/train_full.csv',
 'dataset_type': 'regression',
 'depth': 4,
 'device': device(type='cuda', index=1),
 'dropout': 0.0,
 'ensemble_size': 1,
 'epochs': 30,
 'extra_metrics': [],
 'features_generator': ['morgan'],
 'features_only': True,
 'features_path': None,
 'features_scaling': False,
 'features_size': None,
 'ffn_hidden_size': 2200,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'gpu': 1,
 'grad_clip': None,
 'hidden_size': 2200,
 'ignore_columns': None,
 'init_lr': 0.0001,
 'log_frequency': 10,
 'max_data_size': None,
 'max_lr': 0.001,
 'metric': 'mae',
 'metrics': ['mae'],
 'minimize_score': True,
 'mpn_shared': False,
 'multiclass_num_classes': 3,
 'no_cache_mol': False,
 'no_cuda': False,
 'no_features_scaling': True,
 'num_folds': 10,
 'num_lrs': 1,
 'num_tasks': 1,
 'num_workers': 8,
 'number_of_molecules': 1,
 'pytorch_seed': 0,
 'quiet': True,
 'save_dir': '/home/saxelrod/synthetic/free_energy_ffn_fixed/train',
 'save_preds': False,
 'save_smiles_splits': False,
 'seed': 0,
 'separate_test_features_path': None,
 'separate_test_path': '/home/saxelrod/synthetic/free_energy/test_full.csv',
 'separate_val_features_path': None,
 'separate_val_path': '/home/saxelrod/synthetic/free_energy/val_full.csv',
 'show_individual_scores': False,
 'smiles_columns': [None],
 'split_sizes': (0.8, 0.1, 0.1),
 'split_type': 'random',
 'target_columns': None,
 'task_names': ['ensemblefreeenergy'],
 'test': False,
 'test_fold_index': None,
 'train_data_size': None,
 'undirected': False,
 'use_input_features': True,
 'val_fold_index': None,
 'warmup_epochs': 2.0}
Loading data
Number of tasks = 1
Fold 0
Splitting data with seed 0
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 8.5744e-01, PNorm = 65.7661, GNorm = 2.3578, lr_0 = 1.0413e-04
Loss = 6.5919e-01, PNorm = 65.7793, GNorm = 2.9064, lr_0 = 1.0788e-04
Loss = 5.5336e-01, PNorm = 65.7933, GNorm = 3.4424, lr_0 = 1.1163e-04
Loss = 4.7226e-01, PNorm = 65.8048, GNorm = 2.1010, lr_0 = 1.1537e-04
Loss = 4.4125e-01, PNorm = 65.8150, GNorm = 1.6864, lr_0 = 1.1913e-04
Loss = 4.4204e-01, PNorm = 65.8243, GNorm = 2.5656, lr_0 = 1.2287e-04
Loss = 4.3509e-01, PNorm = 65.8339, GNorm = 1.8219, lr_0 = 1.2663e-04
Loss = 3.6867e-01, PNorm = 65.8423, GNorm = 2.3506, lr_0 = 1.3038e-04
Loss = 3.8762e-01, PNorm = 65.8521, GNorm = 2.0251, lr_0 = 1.3413e-04
Loss = 3.5290e-01, PNorm = 65.8631, GNorm = 2.1011, lr_0 = 1.3788e-04
Loss = 3.8681e-01, PNorm = 65.8734, GNorm = 2.8052, lr_0 = 1.4163e-04
Loss = 4.1581e-01, PNorm = 65.8823, GNorm = 1.5501, lr_0 = 1.4537e-04
Loss = 3.5695e-01, PNorm = 65.8949, GNorm = 1.7983, lr_0 = 1.4913e-04
Loss = 3.8265e-01, PNorm = 65.9062, GNorm = 1.9861, lr_0 = 1.5288e-04
Loss = 3.5482e-01, PNorm = 65.9148, GNorm = 2.3074, lr_0 = 1.5662e-04
Loss = 3.7464e-01, PNorm = 65.9253, GNorm = 2.7204, lr_0 = 1.6038e-04
Loss = 3.9719e-01, PNorm = 65.9398, GNorm = 2.7173, lr_0 = 1.6412e-04
Loss = 3.9259e-01, PNorm = 65.9534, GNorm = 1.8820, lr_0 = 1.6788e-04
Loss = 3.2754e-01, PNorm = 65.9642, GNorm = 1.6274, lr_0 = 1.7163e-04
Loss = 3.8224e-01, PNorm = 65.9769, GNorm = 1.5183, lr_0 = 1.7538e-04
Loss = 3.5630e-01, PNorm = 65.9901, GNorm = 3.0831, lr_0 = 1.7913e-04
Loss = 3.8176e-01, PNorm = 66.0034, GNorm = 2.1640, lr_0 = 1.8288e-04
Loss = 3.0713e-01, PNorm = 66.0186, GNorm = 1.4191, lr_0 = 1.8662e-04
Loss = 3.5259e-01, PNorm = 66.0335, GNorm = 2.1138, lr_0 = 1.9038e-04
Loss = 3.5520e-01, PNorm = 66.0459, GNorm = 1.3245, lr_0 = 1.9413e-04
Loss = 3.2831e-01, PNorm = 66.0618, GNorm = 1.3753, lr_0 = 1.9788e-04
Loss = 3.6986e-01, PNorm = 66.0784, GNorm = 1.8131, lr_0 = 2.0163e-04
Loss = 3.3062e-01, PNorm = 66.0950, GNorm = 1.9610, lr_0 = 2.0537e-04
Loss = 3.3301e-01, PNorm = 66.1100, GNorm = 2.0639, lr_0 = 2.0913e-04
Loss = 3.2619e-01, PNorm = 66.1255, GNorm = 1.9302, lr_0 = 2.1288e-04
Loss = 3.7010e-01, PNorm = 66.1447, GNorm = 1.3796, lr_0 = 2.1663e-04
Loss = 3.2206e-01, PNorm = 66.1620, GNorm = 2.3818, lr_0 = 2.2038e-04
Loss = 3.8065e-01, PNorm = 66.1800, GNorm = 2.5065, lr_0 = 2.2412e-04
Loss = 2.7782e-01, PNorm = 66.1997, GNorm = 1.5339, lr_0 = 2.2787e-04
Loss = 3.5377e-01, PNorm = 66.2153, GNorm = 1.5330, lr_0 = 2.3163e-04
Loss = 2.9410e-01, PNorm = 66.2363, GNorm = 1.8669, lr_0 = 2.3538e-04
Loss = 2.9334e-01, PNorm = 66.2567, GNorm = 1.4955, lr_0 = 2.3913e-04
Loss = 3.2162e-01, PNorm = 66.2731, GNorm = 1.8230, lr_0 = 2.4288e-04
Loss = 3.0769e-01, PNorm = 66.2920, GNorm = 1.1240, lr_0 = 2.4662e-04
Loss = 3.0139e-01, PNorm = 66.3125, GNorm = 1.5296, lr_0 = 2.5038e-04
Loss = 3.1300e-01, PNorm = 66.3276, GNorm = 1.4897, lr_0 = 2.5413e-04
Loss = 3.4350e-01, PNorm = 66.3533, GNorm = 1.9722, lr_0 = 2.5788e-04
Loss = 3.0586e-01, PNorm = 66.3748, GNorm = 1.2035, lr_0 = 2.6163e-04
Loss = 2.9450e-01, PNorm = 66.3997, GNorm = 1.7532, lr_0 = 2.6537e-04
Loss = 2.9493e-01, PNorm = 66.4213, GNorm = 1.3570, lr_0 = 2.6912e-04
Loss = 3.5397e-01, PNorm = 66.4449, GNorm = 1.5049, lr_0 = 2.7288e-04
Loss = 3.1635e-01, PNorm = 66.4694, GNorm = 1.5783, lr_0 = 2.7663e-04
Loss = 3.1539e-01, PNorm = 66.4905, GNorm = 1.4166, lr_0 = 2.8038e-04
Loss = 3.1192e-01, PNorm = 66.5184, GNorm = 1.4459, lr_0 = 2.8413e-04
Loss = 2.8881e-01, PNorm = 66.5425, GNorm = 1.4682, lr_0 = 2.8787e-04
Loss = 2.6765e-01, PNorm = 66.5647, GNorm = 1.8997, lr_0 = 2.9163e-04
Loss = 2.7824e-01, PNorm = 66.5933, GNorm = 1.7526, lr_0 = 2.9538e-04
Loss = 3.0688e-01, PNorm = 66.6117, GNorm = 1.5923, lr_0 = 2.9913e-04
Loss = 3.1224e-01, PNorm = 66.6445, GNorm = 1.5550, lr_0 = 3.0288e-04
Loss = 3.0152e-01, PNorm = 66.6686, GNorm = 1.2273, lr_0 = 3.0662e-04
Loss = 2.5836e-01, PNorm = 66.6924, GNorm = 1.3630, lr_0 = 3.1037e-04
Loss = 2.8497e-01, PNorm = 66.7204, GNorm = 1.3182, lr_0 = 3.1413e-04
Loss = 2.8946e-01, PNorm = 66.7470, GNorm = 1.3766, lr_0 = 3.1788e-04
Loss = 2.5806e-01, PNorm = 66.7713, GNorm = 1.0254, lr_0 = 3.2163e-04
Loss = 2.6709e-01, PNorm = 66.7977, GNorm = 1.3630, lr_0 = 3.2538e-04
Loss = 2.9034e-01, PNorm = 66.8229, GNorm = 1.6381, lr_0 = 3.2912e-04
Loss = 2.9939e-01, PNorm = 66.8523, GNorm = 1.1507, lr_0 = 3.3288e-04
Loss = 2.6204e-01, PNorm = 66.8815, GNorm = 1.0598, lr_0 = 3.3663e-04
Loss = 2.7727e-01, PNorm = 66.9134, GNorm = 1.3243, lr_0 = 3.4038e-04
Loss = 2.6875e-01, PNorm = 66.9450, GNorm = 1.7387, lr_0 = 3.4413e-04
Loss = 2.7765e-01, PNorm = 66.9750, GNorm = 1.1580, lr_0 = 3.4787e-04
Loss = 2.6971e-01, PNorm = 67.0137, GNorm = 1.3541, lr_0 = 3.5162e-04
Loss = 2.6912e-01, PNorm = 67.0422, GNorm = 1.2867, lr_0 = 3.5538e-04
Loss = 2.7405e-01, PNorm = 67.0750, GNorm = 1.3933, lr_0 = 3.5913e-04
Loss = 2.9476e-01, PNorm = 67.1095, GNorm = 1.4982, lr_0 = 3.6288e-04
Loss = 2.6426e-01, PNorm = 67.1403, GNorm = 1.0518, lr_0 = 3.6662e-04
Loss = 2.5442e-01, PNorm = 67.1765, GNorm = 1.0992, lr_0 = 3.7037e-04
Loss = 2.9739e-01, PNorm = 67.2129, GNorm = 1.9572, lr_0 = 3.7413e-04
Loss = 2.9457e-01, PNorm = 67.2456, GNorm = 1.6435, lr_0 = 3.7788e-04
Loss = 2.5491e-01, PNorm = 67.2824, GNorm = 1.3966, lr_0 = 3.8163e-04
Loss = 2.8974e-01, PNorm = 67.3199, GNorm = 1.2609, lr_0 = 3.8537e-04
Loss = 2.6818e-01, PNorm = 67.3602, GNorm = 1.3208, lr_0 = 3.8912e-04
Loss = 2.8909e-01, PNorm = 67.3954, GNorm = 0.8605, lr_0 = 3.9287e-04
Loss = 2.6175e-01, PNorm = 67.4394, GNorm = 0.9696, lr_0 = 3.9663e-04
Loss = 3.0573e-01, PNorm = 67.4806, GNorm = 1.4532, lr_0 = 4.0038e-04
Loss = 3.1285e-01, PNorm = 67.5262, GNorm = 1.3788, lr_0 = 4.0413e-04
Loss = 2.9109e-01, PNorm = 67.5670, GNorm = 1.1420, lr_0 = 4.0787e-04
Loss = 2.6873e-01, PNorm = 67.6084, GNorm = 1.0208, lr_0 = 4.1162e-04
Loss = 2.6522e-01, PNorm = 67.6511, GNorm = 1.6134, lr_0 = 4.1537e-04
Loss = 2.4103e-01, PNorm = 67.6922, GNorm = 1.4439, lr_0 = 4.1913e-04
Loss = 2.6706e-01, PNorm = 67.7337, GNorm = 1.2477, lr_0 = 4.2288e-04
Loss = 2.5502e-01, PNorm = 67.7779, GNorm = 1.0120, lr_0 = 4.2662e-04
Loss = 2.7628e-01, PNorm = 67.8197, GNorm = 0.9610, lr_0 = 4.3037e-04
Loss = 2.9673e-01, PNorm = 67.8677, GNorm = 1.1204, lr_0 = 4.3412e-04
Loss = 2.7724e-01, PNorm = 67.9156, GNorm = 1.1214, lr_0 = 4.3788e-04
Loss = 2.3719e-01, PNorm = 67.9607, GNorm = 0.9851, lr_0 = 4.4163e-04
Loss = 2.5670e-01, PNorm = 68.0048, GNorm = 0.8502, lr_0 = 4.4538e-04
Loss = 2.7445e-01, PNorm = 68.0521, GNorm = 1.4104, lr_0 = 4.4912e-04
Loss = 2.5512e-01, PNorm = 68.0964, GNorm = 1.0902, lr_0 = 4.5287e-04
Loss = 2.7442e-01, PNorm = 68.1479, GNorm = 1.3127, lr_0 = 4.5662e-04
Loss = 2.4304e-01, PNorm = 68.1938, GNorm = 1.2921, lr_0 = 4.6038e-04
Loss = 2.5870e-01, PNorm = 68.2416, GNorm = 1.1602, lr_0 = 4.6413e-04
Loss = 2.6694e-01, PNorm = 68.2861, GNorm = 0.8657, lr_0 = 4.6787e-04
Loss = 2.3165e-01, PNorm = 68.3302, GNorm = 0.9894, lr_0 = 4.7162e-04
Loss = 2.6502e-01, PNorm = 68.3716, GNorm = 1.0941, lr_0 = 4.7537e-04
Loss = 2.5990e-01, PNorm = 68.4192, GNorm = 1.1663, lr_0 = 4.7913e-04
Loss = 2.5671e-01, PNorm = 68.4727, GNorm = 1.1745, lr_0 = 4.8288e-04
Loss = 2.4788e-01, PNorm = 68.5199, GNorm = 0.9442, lr_0 = 4.8663e-04
Loss = 2.7641e-01, PNorm = 68.5822, GNorm = 0.8572, lr_0 = 4.9038e-04
Loss = 2.3576e-01, PNorm = 68.6309, GNorm = 0.8109, lr_0 = 4.9412e-04
Loss = 2.9210e-01, PNorm = 68.6894, GNorm = 1.3950, lr_0 = 4.9788e-04
Loss = 2.7804e-01, PNorm = 68.7422, GNorm = 0.9638, lr_0 = 5.0163e-04
Loss = 2.3066e-01, PNorm = 68.8068, GNorm = 0.9843, lr_0 = 5.0538e-04
Loss = 2.5981e-01, PNorm = 68.8695, GNorm = 1.1650, lr_0 = 5.0913e-04
Loss = 2.7085e-01, PNorm = 68.9249, GNorm = 0.7751, lr_0 = 5.1287e-04
Loss = 2.7825e-01, PNorm = 68.9921, GNorm = 1.2286, lr_0 = 5.1663e-04
Loss = 2.1668e-01, PNorm = 69.0500, GNorm = 1.1959, lr_0 = 5.2038e-04
Loss = 2.7991e-01, PNorm = 69.1080, GNorm = 1.1804, lr_0 = 5.2413e-04
Loss = 2.5710e-01, PNorm = 69.1739, GNorm = 0.8962, lr_0 = 5.2788e-04
Loss = 2.6679e-01, PNorm = 69.2340, GNorm = 0.7939, lr_0 = 5.3162e-04
Loss = 2.5117e-01, PNorm = 69.2905, GNorm = 0.9599, lr_0 = 5.3538e-04
Loss = 2.6479e-01, PNorm = 69.3448, GNorm = 1.0549, lr_0 = 5.3912e-04
Loss = 2.4540e-01, PNorm = 69.4063, GNorm = 1.8725, lr_0 = 5.4288e-04
Loss = 2.2917e-01, PNorm = 69.4587, GNorm = 1.1662, lr_0 = 5.4663e-04
Loss = 2.3973e-01, PNorm = 69.5207, GNorm = 1.0241, lr_0 = 5.5038e-04
Validation mae = 0.318435
Epoch 1
Loss = 1.7246e-01, PNorm = 69.5770, GNorm = 0.8098, lr_0 = 5.5413e-04
Loss = 1.8397e-01, PNorm = 69.6356, GNorm = 0.7805, lr_0 = 5.5787e-04
Loss = 1.5034e-01, PNorm = 69.6959, GNorm = 0.6297, lr_0 = 5.6163e-04
Loss = 1.6173e-01, PNorm = 69.7460, GNorm = 1.0318, lr_0 = 5.6538e-04
Loss = 1.8639e-01, PNorm = 69.8067, GNorm = 0.7289, lr_0 = 5.6913e-04
Loss = 1.5894e-01, PNorm = 69.8667, GNorm = 0.7536, lr_0 = 5.7288e-04
Loss = 1.5617e-01, PNorm = 69.9276, GNorm = 0.8160, lr_0 = 5.7662e-04
Loss = 1.7967e-01, PNorm = 69.9875, GNorm = 0.8557, lr_0 = 5.8038e-04
Loss = 1.5291e-01, PNorm = 70.0441, GNorm = 1.1207, lr_0 = 5.8413e-04
Loss = 1.8089e-01, PNorm = 70.0994, GNorm = 1.3975, lr_0 = 5.8788e-04
Loss = 1.7592e-01, PNorm = 70.1689, GNorm = 0.8560, lr_0 = 5.9163e-04
Loss = 1.7435e-01, PNorm = 70.2416, GNorm = 0.8486, lr_0 = 5.9538e-04
Loss = 1.7528e-01, PNorm = 70.3104, GNorm = 0.9158, lr_0 = 5.9913e-04
Loss = 1.8502e-01, PNorm = 70.3831, GNorm = 0.7349, lr_0 = 6.0288e-04
Loss = 1.7337e-01, PNorm = 70.4524, GNorm = 0.7410, lr_0 = 6.0663e-04
Loss = 1.7968e-01, PNorm = 70.5249, GNorm = 1.1447, lr_0 = 6.1038e-04
Loss = 1.6809e-01, PNorm = 70.5980, GNorm = 0.9064, lr_0 = 6.1413e-04
Loss = 1.7306e-01, PNorm = 70.6744, GNorm = 0.9409, lr_0 = 6.1788e-04
Loss = 1.6592e-01, PNorm = 70.7482, GNorm = 0.7687, lr_0 = 6.2163e-04
Loss = 1.9371e-01, PNorm = 70.8137, GNorm = 1.3164, lr_0 = 6.2538e-04
Loss = 1.9775e-01, PNorm = 70.8941, GNorm = 0.8184, lr_0 = 6.2913e-04
Loss = 1.8123e-01, PNorm = 70.9795, GNorm = 1.0046, lr_0 = 6.3288e-04
Loss = 1.8991e-01, PNorm = 71.0516, GNorm = 0.7569, lr_0 = 6.3663e-04
Loss = 1.7792e-01, PNorm = 71.1395, GNorm = 1.0761, lr_0 = 6.4038e-04
Loss = 1.8912e-01, PNorm = 71.2141, GNorm = 0.6779, lr_0 = 6.4413e-04
Loss = 1.8413e-01, PNorm = 71.2999, GNorm = 1.2495, lr_0 = 6.4788e-04
Loss = 1.9513e-01, PNorm = 71.3840, GNorm = 1.0316, lr_0 = 6.5163e-04
Loss = 1.9527e-01, PNorm = 71.4767, GNorm = 1.1749, lr_0 = 6.5538e-04
Loss = 2.0466e-01, PNorm = 71.5632, GNorm = 0.9457, lr_0 = 6.5913e-04
Loss = 2.0231e-01, PNorm = 71.6514, GNorm = 0.9130, lr_0 = 6.6288e-04
Loss = 1.9673e-01, PNorm = 71.7452, GNorm = 0.9567, lr_0 = 6.6663e-04
Loss = 1.8517e-01, PNorm = 71.8366, GNorm = 0.5115, lr_0 = 6.7038e-04
Loss = 1.7784e-01, PNorm = 71.9337, GNorm = 0.8798, lr_0 = 6.7413e-04
Loss = 1.9913e-01, PNorm = 72.0317, GNorm = 1.1120, lr_0 = 6.7788e-04
Loss = 1.7711e-01, PNorm = 72.1307, GNorm = 0.6818, lr_0 = 6.8163e-04
Loss = 1.9365e-01, PNorm = 72.2327, GNorm = 0.6889, lr_0 = 6.8538e-04
Loss = 1.9167e-01, PNorm = 72.3151, GNorm = 0.9352, lr_0 = 6.8913e-04
Loss = 1.9928e-01, PNorm = 72.4248, GNorm = 0.7937, lr_0 = 6.9288e-04
Loss = 1.9328e-01, PNorm = 72.5213, GNorm = 0.6957, lr_0 = 6.9663e-04
Loss = 2.0507e-01, PNorm = 72.6236, GNorm = 0.8539, lr_0 = 7.0038e-04
Loss = 2.0327e-01, PNorm = 72.7258, GNorm = 1.3933, lr_0 = 7.0413e-04
Loss = 1.9629e-01, PNorm = 72.8274, GNorm = 0.8963, lr_0 = 7.0788e-04
Loss = 2.0287e-01, PNorm = 72.9283, GNorm = 0.7935, lr_0 = 7.1163e-04
Loss = 2.0957e-01, PNorm = 73.0274, GNorm = 1.2216, lr_0 = 7.1538e-04
Loss = 1.8463e-01, PNorm = 73.1378, GNorm = 1.0810, lr_0 = 7.1913e-04
Loss = 2.1157e-01, PNorm = 73.2314, GNorm = 0.8684, lr_0 = 7.2288e-04
Loss = 2.0372e-01, PNorm = 73.3474, GNorm = 0.8327, lr_0 = 7.2663e-04
Loss = 1.9473e-01, PNorm = 73.4516, GNorm = 0.6779, lr_0 = 7.3038e-04
Loss = 1.9541e-01, PNorm = 73.5695, GNorm = 0.7462, lr_0 = 7.3413e-04
Loss = 1.9988e-01, PNorm = 73.6798, GNorm = 0.8907, lr_0 = 7.3788e-04
Loss = 2.1007e-01, PNorm = 73.7917, GNorm = 0.7533, lr_0 = 7.4163e-04
Loss = 1.8092e-01, PNorm = 73.9001, GNorm = 0.9079, lr_0 = 7.4538e-04
Loss = 1.6120e-01, PNorm = 74.0077, GNorm = 0.7178, lr_0 = 7.4913e-04
Loss = 1.7633e-01, PNorm = 74.1139, GNorm = 0.8235, lr_0 = 7.5288e-04
Loss = 1.7810e-01, PNorm = 74.2137, GNorm = 0.7442, lr_0 = 7.5663e-04
Loss = 2.1720e-01, PNorm = 74.3299, GNorm = 0.8834, lr_0 = 7.6038e-04
Loss = 2.0629e-01, PNorm = 74.4460, GNorm = 0.7657, lr_0 = 7.6413e-04
Loss = 2.0935e-01, PNorm = 74.5655, GNorm = 0.9954, lr_0 = 7.6788e-04
Loss = 1.9987e-01, PNorm = 74.6759, GNorm = 1.0206, lr_0 = 7.7163e-04
Loss = 2.3026e-01, PNorm = 74.7846, GNorm = 0.7382, lr_0 = 7.7538e-04
Loss = 2.1402e-01, PNorm = 74.9132, GNorm = 0.8849, lr_0 = 7.7913e-04
Loss = 1.9877e-01, PNorm = 75.0421, GNorm = 0.9386, lr_0 = 7.8288e-04
Loss = 2.1052e-01, PNorm = 75.1703, GNorm = 0.8203, lr_0 = 7.8663e-04
Loss = 1.8044e-01, PNorm = 75.3029, GNorm = 1.0960, lr_0 = 7.9038e-04
Loss = 1.8227e-01, PNorm = 75.4142, GNorm = 0.6056, lr_0 = 7.9413e-04
Loss = 2.2552e-01, PNorm = 75.5454, GNorm = 1.3552, lr_0 = 7.9788e-04
Loss = 2.2241e-01, PNorm = 75.6776, GNorm = 1.1190, lr_0 = 8.0163e-04
Loss = 1.9819e-01, PNorm = 75.8257, GNorm = 0.9457, lr_0 = 8.0538e-04
Loss = 2.2805e-01, PNorm = 75.9674, GNorm = 0.9822, lr_0 = 8.0913e-04
Loss = 2.2322e-01, PNorm = 76.1153, GNorm = 0.8832, lr_0 = 8.1288e-04
Loss = 2.0596e-01, PNorm = 76.2621, GNorm = 1.0931, lr_0 = 8.1663e-04
Loss = 1.9527e-01, PNorm = 76.4018, GNorm = 1.2547, lr_0 = 8.2038e-04
Loss = 2.0880e-01, PNorm = 76.5343, GNorm = 0.7666, lr_0 = 8.2413e-04
Loss = 1.9813e-01, PNorm = 76.6705, GNorm = 0.8892, lr_0 = 8.2788e-04
Loss = 2.1914e-01, PNorm = 76.8039, GNorm = 0.7613, lr_0 = 8.3163e-04
Loss = 2.0058e-01, PNorm = 76.9354, GNorm = 0.9060, lr_0 = 8.3538e-04
Loss = 2.0403e-01, PNorm = 77.0703, GNorm = 1.9215, lr_0 = 8.3913e-04
Loss = 1.9838e-01, PNorm = 77.2005, GNorm = 0.8160, lr_0 = 8.4288e-04
Loss = 2.1940e-01, PNorm = 77.3270, GNorm = 1.2977, lr_0 = 8.4663e-04
Loss = 2.2968e-01, PNorm = 77.4568, GNorm = 0.9086, lr_0 = 8.5038e-04
Loss = 2.0416e-01, PNorm = 77.5921, GNorm = 1.1020, lr_0 = 8.5413e-04
Loss = 2.0668e-01, PNorm = 77.7075, GNorm = 0.9773, lr_0 = 8.5788e-04
Loss = 2.0072e-01, PNorm = 77.8355, GNorm = 0.8523, lr_0 = 8.6163e-04
Loss = 2.0509e-01, PNorm = 77.9653, GNorm = 0.7964, lr_0 = 8.6538e-04
Loss = 2.0748e-01, PNorm = 78.1007, GNorm = 0.7375, lr_0 = 8.6913e-04
Loss = 2.0340e-01, PNorm = 78.2442, GNorm = 0.5853, lr_0 = 8.7288e-04
Loss = 2.2453e-01, PNorm = 78.3769, GNorm = 0.8380, lr_0 = 8.7663e-04
Loss = 1.8935e-01, PNorm = 78.5088, GNorm = 0.9827, lr_0 = 8.8038e-04
Loss = 2.0075e-01, PNorm = 78.6440, GNorm = 0.8918, lr_0 = 8.8413e-04
Loss = 1.8630e-01, PNorm = 78.7879, GNorm = 0.8313, lr_0 = 8.8788e-04
Loss = 2.0948e-01, PNorm = 78.9335, GNorm = 0.6686, lr_0 = 8.9163e-04
Loss = 2.2128e-01, PNorm = 79.0847, GNorm = 0.8828, lr_0 = 8.9538e-04
Loss = 2.1181e-01, PNorm = 79.2367, GNorm = 0.7833, lr_0 = 8.9913e-04
Loss = 2.3664e-01, PNorm = 79.3802, GNorm = 0.9260, lr_0 = 9.0288e-04
Loss = 2.4146e-01, PNorm = 79.5318, GNorm = 1.1072, lr_0 = 9.0663e-04
Loss = 2.0628e-01, PNorm = 79.6867, GNorm = 0.8985, lr_0 = 9.1038e-04
Loss = 2.2299e-01, PNorm = 79.8341, GNorm = 1.2473, lr_0 = 9.1413e-04
Loss = 2.1442e-01, PNorm = 79.9669, GNorm = 0.8162, lr_0 = 9.1788e-04
Loss = 2.1580e-01, PNorm = 80.1202, GNorm = 0.8439, lr_0 = 9.2163e-04
Loss = 1.9817e-01, PNorm = 80.2593, GNorm = 0.8175, lr_0 = 9.2538e-04
Loss = 2.3381e-01, PNorm = 80.4135, GNorm = 0.9612, lr_0 = 9.2913e-04
Loss = 2.2874e-01, PNorm = 80.5488, GNorm = 1.0850, lr_0 = 9.3288e-04
Loss = 2.0782e-01, PNorm = 80.6888, GNorm = 0.7407, lr_0 = 9.3663e-04
Loss = 2.1536e-01, PNorm = 80.8338, GNorm = 0.7985, lr_0 = 9.4038e-04
Loss = 1.9931e-01, PNorm = 80.9752, GNorm = 0.6258, lr_0 = 9.4413e-04
Loss = 2.0080e-01, PNorm = 81.1076, GNorm = 0.7714, lr_0 = 9.4788e-04
Loss = 2.1616e-01, PNorm = 81.2557, GNorm = 1.0814, lr_0 = 9.5163e-04
Loss = 2.2122e-01, PNorm = 81.4082, GNorm = 0.7688, lr_0 = 9.5538e-04
Loss = 2.1990e-01, PNorm = 81.5781, GNorm = 1.4485, lr_0 = 9.5913e-04
Loss = 1.8344e-01, PNorm = 81.7143, GNorm = 1.0359, lr_0 = 9.6288e-04
Loss = 2.3204e-01, PNorm = 81.8619, GNorm = 2.2318, lr_0 = 9.6663e-04
Loss = 2.1749e-01, PNorm = 81.9892, GNorm = 0.7429, lr_0 = 9.7038e-04
Loss = 1.9017e-01, PNorm = 82.1467, GNorm = 1.2907, lr_0 = 9.7413e-04
Loss = 2.0354e-01, PNorm = 82.2779, GNorm = 0.8548, lr_0 = 9.7788e-04
Loss = 2.3441e-01, PNorm = 82.4121, GNorm = 0.9148, lr_0 = 9.8163e-04
Loss = 2.3488e-01, PNorm = 82.5531, GNorm = 0.7264, lr_0 = 9.8537e-04
Loss = 2.3410e-01, PNorm = 82.7025, GNorm = 1.1444, lr_0 = 9.8912e-04
Loss = 2.0592e-01, PNorm = 82.8537, GNorm = 0.7696, lr_0 = 9.9288e-04
Loss = 2.0978e-01, PNorm = 83.0194, GNorm = 0.9884, lr_0 = 9.9663e-04
Loss = 2.5690e-01, PNorm = 83.1655, GNorm = 0.9462, lr_0 = 9.9993e-04
Validation mae = 0.318196
Epoch 2
Loss = 1.4313e-01, PNorm = 83.3311, GNorm = 0.5798, lr_0 = 9.9925e-04
Loss = 1.3005e-01, PNorm = 83.4584, GNorm = 0.7159, lr_0 = 9.9856e-04
Loss = 1.2347e-01, PNorm = 83.5783, GNorm = 0.6654, lr_0 = 9.9788e-04
Loss = 1.4140e-01, PNorm = 83.6948, GNorm = 0.6901, lr_0 = 9.9719e-04
Loss = 1.2131e-01, PNorm = 83.8003, GNorm = 0.8237, lr_0 = 9.9651e-04
Loss = 1.3968e-01, PNorm = 83.9057, GNorm = 1.2064, lr_0 = 9.9583e-04
Loss = 1.2615e-01, PNorm = 84.0333, GNorm = 0.4796, lr_0 = 9.9515e-04
Loss = 1.3247e-01, PNorm = 84.1471, GNorm = 1.0196, lr_0 = 9.9446e-04
Loss = 1.2531e-01, PNorm = 84.2713, GNorm = 0.6698, lr_0 = 9.9378e-04
Loss = 1.3571e-01, PNorm = 84.3794, GNorm = 0.5193, lr_0 = 9.9310e-04
Loss = 1.3547e-01, PNorm = 84.5066, GNorm = 0.9283, lr_0 = 9.9242e-04
Loss = 1.3477e-01, PNorm = 84.6207, GNorm = 0.5609, lr_0 = 9.9174e-04
Loss = 1.2970e-01, PNorm = 84.7305, GNorm = 0.6382, lr_0 = 9.9106e-04
Loss = 1.1007e-01, PNorm = 84.8405, GNorm = 0.5223, lr_0 = 9.9038e-04
Loss = 1.3365e-01, PNorm = 84.9626, GNorm = 0.7650, lr_0 = 9.8971e-04
Loss = 1.2633e-01, PNorm = 85.0942, GNorm = 1.0785, lr_0 = 9.8903e-04
Loss = 1.2296e-01, PNorm = 85.2179, GNorm = 0.5851, lr_0 = 9.8835e-04
Loss = 1.4059e-01, PNorm = 85.3345, GNorm = 0.6125, lr_0 = 9.8767e-04
Loss = 1.2005e-01, PNorm = 85.4522, GNorm = 0.6544, lr_0 = 9.8700e-04
Loss = 1.1148e-01, PNorm = 85.5715, GNorm = 0.5617, lr_0 = 9.8632e-04
Loss = 1.1167e-01, PNorm = 85.6926, GNorm = 0.5133, lr_0 = 9.8564e-04
Loss = 1.2030e-01, PNorm = 85.8115, GNorm = 0.5946, lr_0 = 9.8497e-04
Loss = 1.4211e-01, PNorm = 85.9433, GNorm = 0.6328, lr_0 = 9.8429e-04
Loss = 1.2160e-01, PNorm = 86.0652, GNorm = 0.5581, lr_0 = 9.8362e-04
Loss = 1.2949e-01, PNorm = 86.1952, GNorm = 0.5017, lr_0 = 9.8295e-04
Loss = 1.1845e-01, PNorm = 86.3082, GNorm = 0.6788, lr_0 = 9.8227e-04
Loss = 1.3545e-01, PNorm = 86.4296, GNorm = 0.5243, lr_0 = 9.8160e-04
Loss = 1.3577e-01, PNorm = 86.5557, GNorm = 0.7655, lr_0 = 9.8093e-04
Loss = 1.1969e-01, PNorm = 86.6903, GNorm = 0.4998, lr_0 = 9.8026e-04
Loss = 1.2504e-01, PNorm = 86.8330, GNorm = 0.7388, lr_0 = 9.7958e-04
Loss = 1.4146e-01, PNorm = 86.9636, GNorm = 0.4803, lr_0 = 9.7891e-04
Loss = 1.2785e-01, PNorm = 87.1093, GNorm = 0.7043, lr_0 = 9.7824e-04
Loss = 1.2883e-01, PNorm = 87.2401, GNorm = 0.6061, lr_0 = 9.7757e-04
Loss = 1.1874e-01, PNorm = 87.3699, GNorm = 0.7808, lr_0 = 9.7690e-04
Loss = 1.2104e-01, PNorm = 87.4863, GNorm = 0.6058, lr_0 = 9.7623e-04
Loss = 1.3522e-01, PNorm = 87.6115, GNorm = 0.5048, lr_0 = 9.7556e-04
Loss = 1.1582e-01, PNorm = 87.7325, GNorm = 0.4720, lr_0 = 9.7490e-04
Loss = 1.3946e-01, PNorm = 87.8551, GNorm = 0.5197, lr_0 = 9.7423e-04
Loss = 1.1645e-01, PNorm = 87.9750, GNorm = 0.8168, lr_0 = 9.7356e-04
Loss = 1.2079e-01, PNorm = 88.1044, GNorm = 0.8119, lr_0 = 9.7289e-04
Loss = 1.3240e-01, PNorm = 88.2147, GNorm = 0.6736, lr_0 = 9.7223e-04
Loss = 1.3030e-01, PNorm = 88.3318, GNorm = 0.6638, lr_0 = 9.7156e-04
Loss = 1.1726e-01, PNorm = 88.4539, GNorm = 0.5314, lr_0 = 9.7090e-04
Loss = 1.3729e-01, PNorm = 88.5727, GNorm = 0.8067, lr_0 = 9.7023e-04
Loss = 1.3062e-01, PNorm = 88.6911, GNorm = 1.2599, lr_0 = 9.6957e-04
Loss = 1.5146e-01, PNorm = 88.8163, GNorm = 0.6791, lr_0 = 9.6890e-04
Loss = 1.5161e-01, PNorm = 88.9586, GNorm = 0.6319, lr_0 = 9.6824e-04
Loss = 1.1795e-01, PNorm = 89.0941, GNorm = 0.8444, lr_0 = 9.6757e-04
Loss = 1.4084e-01, PNorm = 89.2217, GNorm = 0.8281, lr_0 = 9.6691e-04
Loss = 1.2702e-01, PNorm = 89.3571, GNorm = 0.8254, lr_0 = 9.6625e-04
Loss = 1.2387e-01, PNorm = 89.4990, GNorm = 0.5158, lr_0 = 9.6559e-04
Loss = 1.4392e-01, PNorm = 89.6259, GNorm = 0.6425, lr_0 = 9.6493e-04
Loss = 1.5511e-01, PNorm = 89.7770, GNorm = 0.5317, lr_0 = 9.6427e-04
Loss = 1.3535e-01, PNorm = 89.9325, GNorm = 0.8494, lr_0 = 9.6360e-04
Loss = 1.4132e-01, PNorm = 90.0730, GNorm = 0.5404, lr_0 = 9.6294e-04
Loss = 1.3974e-01, PNorm = 90.2161, GNorm = 0.8074, lr_0 = 9.6228e-04
Loss = 1.4089e-01, PNorm = 90.3609, GNorm = 0.5685, lr_0 = 9.6163e-04
Loss = 1.3252e-01, PNorm = 90.4929, GNorm = 0.7849, lr_0 = 9.6097e-04
Loss = 1.3426e-01, PNorm = 90.6350, GNorm = 0.5262, lr_0 = 9.6031e-04
Loss = 1.6638e-01, PNorm = 90.7617, GNorm = 0.5221, lr_0 = 9.5965e-04
Loss = 1.1982e-01, PNorm = 90.9056, GNorm = 0.5955, lr_0 = 9.5899e-04
Loss = 1.4618e-01, PNorm = 91.0252, GNorm = 0.6706, lr_0 = 9.5834e-04
Loss = 1.2152e-01, PNorm = 91.1574, GNorm = 0.6522, lr_0 = 9.5768e-04
Loss = 1.3749e-01, PNorm = 91.2899, GNorm = 0.5466, lr_0 = 9.5702e-04
Loss = 1.3865e-01, PNorm = 91.4124, GNorm = 0.5841, lr_0 = 9.5637e-04
Loss = 1.2638e-01, PNorm = 91.5411, GNorm = 0.5519, lr_0 = 9.5571e-04
Loss = 1.4164e-01, PNorm = 91.6690, GNorm = 0.6714, lr_0 = 9.5506e-04
Loss = 1.4297e-01, PNorm = 91.7904, GNorm = 0.8279, lr_0 = 9.5440e-04
Loss = 1.6367e-01, PNorm = 91.9259, GNorm = 0.6119, lr_0 = 9.5375e-04
Loss = 1.2663e-01, PNorm = 92.0620, GNorm = 0.6225, lr_0 = 9.5310e-04
Loss = 1.3740e-01, PNorm = 92.1972, GNorm = 0.7287, lr_0 = 9.5244e-04
Loss = 1.3208e-01, PNorm = 92.3252, GNorm = 0.6078, lr_0 = 9.5179e-04
Loss = 1.4922e-01, PNorm = 92.4509, GNorm = 0.5390, lr_0 = 9.5114e-04
Loss = 1.3897e-01, PNorm = 92.5767, GNorm = 0.6024, lr_0 = 9.5049e-04
Loss = 1.3829e-01, PNorm = 92.7116, GNorm = 0.5628, lr_0 = 9.4984e-04
Loss = 1.2836e-01, PNorm = 92.8467, GNorm = 0.6028, lr_0 = 9.4919e-04
Loss = 1.4183e-01, PNorm = 92.9638, GNorm = 0.6456, lr_0 = 9.4854e-04
Loss = 1.4652e-01, PNorm = 93.0936, GNorm = 0.8035, lr_0 = 9.4789e-04
Loss = 1.3917e-01, PNorm = 93.2098, GNorm = 0.7933, lr_0 = 9.4724e-04
Loss = 1.4606e-01, PNorm = 93.3330, GNorm = 1.3422, lr_0 = 9.4659e-04
Loss = 1.3878e-01, PNorm = 93.4534, GNorm = 0.4462, lr_0 = 9.4594e-04
Loss = 1.5274e-01, PNorm = 93.5818, GNorm = 1.0974, lr_0 = 9.4529e-04
Loss = 1.3414e-01, PNorm = 93.7153, GNorm = 0.4461, lr_0 = 9.4464e-04
Loss = 1.4047e-01, PNorm = 93.8456, GNorm = 0.8883, lr_0 = 9.4400e-04
Loss = 1.5551e-01, PNorm = 93.9638, GNorm = 0.9928, lr_0 = 9.4335e-04
Loss = 1.5099e-01, PNorm = 94.0935, GNorm = 0.8746, lr_0 = 9.4270e-04
Loss = 1.3092e-01, PNorm = 94.2274, GNorm = 0.9293, lr_0 = 9.4206e-04
Loss = 1.5176e-01, PNorm = 94.3454, GNorm = 0.5645, lr_0 = 9.4141e-04
Loss = 1.5589e-01, PNorm = 94.4792, GNorm = 0.6395, lr_0 = 9.4077e-04
Loss = 1.5668e-01, PNorm = 94.5961, GNorm = 0.7916, lr_0 = 9.4012e-04
Loss = 1.4475e-01, PNorm = 94.7325, GNorm = 0.5258, lr_0 = 9.3948e-04
Loss = 1.3123e-01, PNorm = 94.8695, GNorm = 0.7177, lr_0 = 9.3884e-04
Loss = 1.6110e-01, PNorm = 94.9947, GNorm = 0.4597, lr_0 = 9.3819e-04
Loss = 1.3916e-01, PNorm = 95.1190, GNorm = 0.5026, lr_0 = 9.3755e-04
Loss = 1.3122e-01, PNorm = 95.2376, GNorm = 0.6592, lr_0 = 9.3691e-04
Loss = 1.4044e-01, PNorm = 95.3493, GNorm = 0.7880, lr_0 = 9.3627e-04
Loss = 1.3846e-01, PNorm = 95.4989, GNorm = 0.5061, lr_0 = 9.3562e-04
Loss = 1.4036e-01, PNorm = 95.6202, GNorm = 0.4104, lr_0 = 9.3498e-04
Loss = 1.4954e-01, PNorm = 95.7547, GNorm = 0.5884, lr_0 = 9.3434e-04
Loss = 1.3701e-01, PNorm = 95.8720, GNorm = 0.7282, lr_0 = 9.3370e-04
Loss = 1.4837e-01, PNorm = 95.9947, GNorm = 0.7650, lr_0 = 9.3306e-04
Loss = 1.5987e-01, PNorm = 96.1185, GNorm = 0.7791, lr_0 = 9.3242e-04
Loss = 1.4023e-01, PNorm = 96.2519, GNorm = 0.7768, lr_0 = 9.3178e-04
Loss = 1.4481e-01, PNorm = 96.3816, GNorm = 0.6946, lr_0 = 9.3115e-04
Loss = 1.5074e-01, PNorm = 96.5090, GNorm = 0.8216, lr_0 = 9.3051e-04
Loss = 1.3296e-01, PNorm = 96.6269, GNorm = 0.8980, lr_0 = 9.2987e-04
Loss = 1.3727e-01, PNorm = 96.7511, GNorm = 0.6393, lr_0 = 9.2923e-04
Loss = 1.3499e-01, PNorm = 96.8679, GNorm = 1.0275, lr_0 = 9.2860e-04
Loss = 1.5695e-01, PNorm = 96.9841, GNorm = 0.7558, lr_0 = 9.2796e-04
Loss = 1.3697e-01, PNorm = 97.1075, GNorm = 0.5671, lr_0 = 9.2733e-04
Loss = 1.5207e-01, PNorm = 97.2275, GNorm = 0.6897, lr_0 = 9.2669e-04
Loss = 1.5902e-01, PNorm = 97.3449, GNorm = 0.5499, lr_0 = 9.2606e-04
Loss = 1.3284e-01, PNorm = 97.4620, GNorm = 0.6680, lr_0 = 9.2542e-04
Loss = 1.5440e-01, PNorm = 97.5770, GNorm = 1.6284, lr_0 = 9.2479e-04
Loss = 1.5471e-01, PNorm = 97.6944, GNorm = 1.4121, lr_0 = 9.2415e-04
Loss = 1.3898e-01, PNorm = 97.8199, GNorm = 0.5149, lr_0 = 9.2352e-04
Loss = 1.5400e-01, PNorm = 97.9370, GNorm = 0.5701, lr_0 = 9.2289e-04
Loss = 1.6318e-01, PNorm = 98.0632, GNorm = 1.1453, lr_0 = 9.2226e-04
Loss = 1.3638e-01, PNorm = 98.1863, GNorm = 0.6513, lr_0 = 9.2162e-04
Loss = 1.5141e-01, PNorm = 98.3098, GNorm = 0.9459, lr_0 = 9.2099e-04
Validation mae = 0.301206
Epoch 3
Loss = 9.3495e-02, PNorm = 98.4243, GNorm = 0.6175, lr_0 = 9.2036e-04
Loss = 8.8677e-02, PNorm = 98.5322, GNorm = 0.4204, lr_0 = 9.1973e-04
Loss = 7.9043e-02, PNorm = 98.6154, GNorm = 0.4905, lr_0 = 9.1910e-04
Loss = 7.5801e-02, PNorm = 98.6973, GNorm = 0.4127, lr_0 = 9.1847e-04
Loss = 8.3432e-02, PNorm = 98.7655, GNorm = 1.0429, lr_0 = 9.1784e-04
Loss = 7.5679e-02, PNorm = 98.8418, GNorm = 0.5783, lr_0 = 9.1721e-04
Loss = 7.3345e-02, PNorm = 98.9097, GNorm = 0.4502, lr_0 = 9.1658e-04
Loss = 8.6798e-02, PNorm = 98.9899, GNorm = 1.4614, lr_0 = 9.1596e-04
Loss = 8.2518e-02, PNorm = 99.0565, GNorm = 1.1423, lr_0 = 9.1533e-04
Loss = 7.6458e-02, PNorm = 99.1323, GNorm = 0.5598, lr_0 = 9.1470e-04
Loss = 8.3407e-02, PNorm = 99.2102, GNorm = 0.5313, lr_0 = 9.1408e-04
Loss = 8.0338e-02, PNorm = 99.2815, GNorm = 0.3750, lr_0 = 9.1345e-04
Loss = 7.4601e-02, PNorm = 99.3533, GNorm = 0.2737, lr_0 = 9.1282e-04
Loss = 5.9465e-02, PNorm = 99.4163, GNorm = 0.3622, lr_0 = 9.1220e-04
Loss = 7.8617e-02, PNorm = 99.4993, GNorm = 0.5927, lr_0 = 9.1157e-04
Loss = 8.5440e-02, PNorm = 99.5678, GNorm = 0.4766, lr_0 = 9.1095e-04
Loss = 8.0385e-02, PNorm = 99.6611, GNorm = 0.3521, lr_0 = 9.1032e-04
Loss = 9.1997e-02, PNorm = 99.7341, GNorm = 0.3882, lr_0 = 9.0970e-04
Loss = 8.1170e-02, PNorm = 99.8204, GNorm = 0.6651, lr_0 = 9.0908e-04
Loss = 8.4282e-02, PNorm = 99.9023, GNorm = 0.9039, lr_0 = 9.0846e-04
Loss = 7.9521e-02, PNorm = 99.9855, GNorm = 0.5833, lr_0 = 9.0783e-04
Loss = 7.6683e-02, PNorm = 100.0669, GNorm = 0.4664, lr_0 = 9.0721e-04
Loss = 8.3860e-02, PNorm = 100.1450, GNorm = 0.4433, lr_0 = 9.0659e-04
Loss = 8.6625e-02, PNorm = 100.2325, GNorm = 0.4345, lr_0 = 9.0597e-04
Loss = 7.2300e-02, PNorm = 100.3167, GNorm = 0.5292, lr_0 = 9.0535e-04
Loss = 8.0183e-02, PNorm = 100.3922, GNorm = 0.3906, lr_0 = 9.0473e-04
Loss = 7.8394e-02, PNorm = 100.4749, GNorm = 0.5479, lr_0 = 9.0411e-04
Loss = 8.5394e-02, PNorm = 100.5513, GNorm = 0.6137, lr_0 = 9.0349e-04
Loss = 7.7974e-02, PNorm = 100.6386, GNorm = 0.6515, lr_0 = 9.0287e-04
Loss = 9.1821e-02, PNorm = 100.7188, GNorm = 0.4803, lr_0 = 9.0225e-04
Loss = 9.2048e-02, PNorm = 100.8146, GNorm = 0.6535, lr_0 = 9.0163e-04
Loss = 7.5049e-02, PNorm = 100.8978, GNorm = 0.6506, lr_0 = 9.0102e-04
Loss = 8.8353e-02, PNorm = 100.9825, GNorm = 0.4188, lr_0 = 9.0040e-04
Loss = 8.1260e-02, PNorm = 101.0704, GNorm = 0.5943, lr_0 = 8.9978e-04
Loss = 8.4852e-02, PNorm = 101.1589, GNorm = 0.5879, lr_0 = 8.9916e-04
Loss = 6.8567e-02, PNorm = 101.2413, GNorm = 1.0060, lr_0 = 8.9855e-04
Loss = 7.8061e-02, PNorm = 101.3138, GNorm = 0.5186, lr_0 = 8.9793e-04
Loss = 7.9997e-02, PNorm = 101.3994, GNorm = 0.7615, lr_0 = 8.9732e-04
Loss = 8.9919e-02, PNorm = 101.4699, GNorm = 0.4752, lr_0 = 8.9670e-04
Loss = 8.2148e-02, PNorm = 101.5577, GNorm = 0.5735, lr_0 = 8.9609e-04
Loss = 7.5513e-02, PNorm = 101.6312, GNorm = 0.6553, lr_0 = 8.9548e-04
Loss = 8.0897e-02, PNorm = 101.7152, GNorm = 0.5826, lr_0 = 8.9486e-04
Loss = 7.4361e-02, PNorm = 101.8050, GNorm = 1.0327, lr_0 = 8.9425e-04
Loss = 9.0295e-02, PNorm = 101.8859, GNorm = 1.2530, lr_0 = 8.9364e-04
Loss = 8.8191e-02, PNorm = 101.9769, GNorm = 0.5957, lr_0 = 8.9302e-04
Loss = 7.6208e-02, PNorm = 102.0681, GNorm = 0.5145, lr_0 = 8.9241e-04
Loss = 8.3222e-02, PNorm = 102.1597, GNorm = 0.7464, lr_0 = 8.9180e-04
Loss = 8.6178e-02, PNorm = 102.2487, GNorm = 0.7272, lr_0 = 8.9119e-04
Loss = 7.9205e-02, PNorm = 102.3421, GNorm = 0.4736, lr_0 = 8.9058e-04
Loss = 8.7107e-02, PNorm = 102.4297, GNorm = 0.7984, lr_0 = 8.8997e-04
Loss = 9.5228e-02, PNorm = 102.5300, GNorm = 0.5486, lr_0 = 8.8936e-04
Loss = 7.8701e-02, PNorm = 102.6320, GNorm = 0.6388, lr_0 = 8.8875e-04
Loss = 7.0451e-02, PNorm = 102.7259, GNorm = 0.3796, lr_0 = 8.8814e-04
Loss = 8.1447e-02, PNorm = 102.8065, GNorm = 0.5694, lr_0 = 8.8753e-04
Loss = 1.0737e-01, PNorm = 102.8965, GNorm = 0.8822, lr_0 = 8.8693e-04
Loss = 1.0131e-01, PNorm = 102.9977, GNorm = 0.3531, lr_0 = 8.8632e-04
Loss = 9.2609e-02, PNorm = 103.1049, GNorm = 0.4717, lr_0 = 8.8571e-04
Loss = 7.8242e-02, PNorm = 103.2006, GNorm = 0.7099, lr_0 = 8.8510e-04
Loss = 8.6438e-02, PNorm = 103.2965, GNorm = 0.3813, lr_0 = 8.8450e-04
Loss = 8.3886e-02, PNorm = 103.4002, GNorm = 0.5723, lr_0 = 8.8389e-04
Loss = 8.0398e-02, PNorm = 103.4923, GNorm = 0.4157, lr_0 = 8.8329e-04
Loss = 8.5375e-02, PNorm = 103.5969, GNorm = 0.9408, lr_0 = 8.8268e-04
Loss = 8.3602e-02, PNorm = 103.7073, GNorm = 0.6828, lr_0 = 8.8208e-04
Loss = 7.9784e-02, PNorm = 103.8076, GNorm = 0.7037, lr_0 = 8.8147e-04
Loss = 8.6613e-02, PNorm = 103.9140, GNorm = 0.5461, lr_0 = 8.8087e-04
Loss = 8.5988e-02, PNorm = 104.0148, GNorm = 0.4621, lr_0 = 8.8026e-04
Loss = 8.8660e-02, PNorm = 104.1175, GNorm = 0.4850, lr_0 = 8.7966e-04
Loss = 8.9368e-02, PNorm = 104.2231, GNorm = 0.8779, lr_0 = 8.7906e-04
Loss = 7.7517e-02, PNorm = 104.3209, GNorm = 0.4497, lr_0 = 8.7846e-04
Loss = 7.9880e-02, PNorm = 104.4221, GNorm = 0.4890, lr_0 = 8.7785e-04
Loss = 9.8266e-02, PNorm = 104.4993, GNorm = 0.6498, lr_0 = 8.7725e-04
Loss = 9.8627e-02, PNorm = 104.6045, GNorm = 0.7178, lr_0 = 8.7665e-04
Loss = 9.0413e-02, PNorm = 104.7105, GNorm = 0.4066, lr_0 = 8.7605e-04
Loss = 8.9574e-02, PNorm = 104.8166, GNorm = 0.7398, lr_0 = 8.7545e-04
Loss = 8.3775e-02, PNorm = 104.9263, GNorm = 0.6731, lr_0 = 8.7485e-04
Loss = 9.3402e-02, PNorm = 105.0227, GNorm = 0.6878, lr_0 = 8.7425e-04
Loss = 9.3304e-02, PNorm = 105.1325, GNorm = 1.2309, lr_0 = 8.7365e-04
Loss = 9.5490e-02, PNorm = 105.2334, GNorm = 0.7872, lr_0 = 8.7306e-04
Loss = 9.6838e-02, PNorm = 105.3501, GNorm = 0.3831, lr_0 = 8.7246e-04
Loss = 8.4934e-02, PNorm = 105.4601, GNorm = 0.6775, lr_0 = 8.7186e-04
Loss = 8.7826e-02, PNorm = 105.5733, GNorm = 0.5633, lr_0 = 8.7126e-04
Loss = 8.1889e-02, PNorm = 105.6698, GNorm = 0.5772, lr_0 = 8.7067e-04
Loss = 8.7773e-02, PNorm = 105.7735, GNorm = 0.4643, lr_0 = 8.7007e-04
Loss = 9.0924e-02, PNorm = 105.8689, GNorm = 0.7385, lr_0 = 8.6947e-04
Loss = 8.7975e-02, PNorm = 105.9696, GNorm = 0.7050, lr_0 = 8.6888e-04
Loss = 9.1137e-02, PNorm = 106.0741, GNorm = 0.7109, lr_0 = 8.6828e-04
Loss = 9.4016e-02, PNorm = 106.1785, GNorm = 0.8748, lr_0 = 8.6769e-04
Loss = 8.8890e-02, PNorm = 106.2886, GNorm = 0.8165, lr_0 = 8.6709e-04
Loss = 8.8406e-02, PNorm = 106.3916, GNorm = 1.6168, lr_0 = 8.6650e-04
Loss = 8.6757e-02, PNorm = 106.4955, GNorm = 0.4477, lr_0 = 8.6590e-04
Loss = 8.9141e-02, PNorm = 106.5917, GNorm = 0.7069, lr_0 = 8.6531e-04
Loss = 8.9855e-02, PNorm = 106.7054, GNorm = 0.8874, lr_0 = 8.6472e-04
Loss = 1.0166e-01, PNorm = 106.7988, GNorm = 0.6976, lr_0 = 8.6413e-04
Loss = 9.2158e-02, PNorm = 106.9145, GNorm = 0.8634, lr_0 = 8.6353e-04
Loss = 9.5891e-02, PNorm = 107.0190, GNorm = 0.8144, lr_0 = 8.6294e-04
Loss = 9.1585e-02, PNorm = 107.1345, GNorm = 0.4184, lr_0 = 8.6235e-04
Loss = 1.0161e-01, PNorm = 107.2451, GNorm = 0.4057, lr_0 = 8.6176e-04
Loss = 9.8985e-02, PNorm = 107.3605, GNorm = 0.4210, lr_0 = 8.6117e-04
Loss = 8.9609e-02, PNorm = 107.4753, GNorm = 0.6051, lr_0 = 8.6058e-04
Loss = 9.4816e-02, PNorm = 107.5867, GNorm = 0.4928, lr_0 = 8.5999e-04
Loss = 9.1282e-02, PNorm = 107.6976, GNorm = 0.3848, lr_0 = 8.5940e-04
Loss = 9.8838e-02, PNorm = 107.7998, GNorm = 0.6827, lr_0 = 8.5881e-04
Loss = 9.2318e-02, PNorm = 107.9077, GNorm = 0.6302, lr_0 = 8.5823e-04
Loss = 9.9187e-02, PNorm = 108.0146, GNorm = 1.0263, lr_0 = 8.5764e-04
Loss = 1.0413e-01, PNorm = 108.1354, GNorm = 0.6080, lr_0 = 8.5705e-04
Loss = 9.7894e-02, PNorm = 108.2657, GNorm = 0.8452, lr_0 = 8.5646e-04
Loss = 8.4821e-02, PNorm = 108.3875, GNorm = 0.4328, lr_0 = 8.5588e-04
Loss = 9.6234e-02, PNorm = 108.5150, GNorm = 0.4544, lr_0 = 8.5529e-04
Loss = 9.8958e-02, PNorm = 108.6311, GNorm = 0.4082, lr_0 = 8.5470e-04
Loss = 1.0223e-01, PNorm = 108.7556, GNorm = 0.6008, lr_0 = 8.5412e-04
Loss = 1.0661e-01, PNorm = 108.8685, GNorm = 1.0579, lr_0 = 8.5353e-04
Loss = 1.0110e-01, PNorm = 109.0007, GNorm = 0.5033, lr_0 = 8.5295e-04
Loss = 9.0504e-02, PNorm = 109.1165, GNorm = 0.4204, lr_0 = 8.5236e-04
Loss = 1.0522e-01, PNorm = 109.2314, GNorm = 0.6845, lr_0 = 8.5178e-04
Loss = 1.0524e-01, PNorm = 109.3444, GNorm = 0.9501, lr_0 = 8.5120e-04
Loss = 1.0146e-01, PNorm = 109.4589, GNorm = 0.7921, lr_0 = 8.5061e-04
Loss = 9.7904e-02, PNorm = 109.5771, GNorm = 0.8980, lr_0 = 8.5003e-04
Loss = 1.0583e-01, PNorm = 109.6894, GNorm = 0.4034, lr_0 = 8.4945e-04
Loss = 1.0461e-01, PNorm = 109.8124, GNorm = 0.6856, lr_0 = 8.4887e-04
Loss = 8.4546e-02, PNorm = 109.9275, GNorm = 0.4840, lr_0 = 8.4828e-04
Validation mae = 0.294267
Epoch 4
Loss = 7.3428e-02, PNorm = 110.0139, GNorm = 1.0316, lr_0 = 8.4770e-04
Loss = 7.5507e-02, PNorm = 110.1151, GNorm = 0.3720, lr_0 = 8.4712e-04
Loss = 6.6338e-02, PNorm = 110.1896, GNorm = 0.5707, lr_0 = 8.4654e-04
Loss = 7.3201e-02, PNorm = 110.2672, GNorm = 0.5697, lr_0 = 8.4596e-04
Loss = 5.8505e-02, PNorm = 110.3381, GNorm = 0.4748, lr_0 = 8.4538e-04
Loss = 5.9888e-02, PNorm = 110.4095, GNorm = 0.8568, lr_0 = 8.4480e-04
Loss = 5.4201e-02, PNorm = 110.4734, GNorm = 0.4527, lr_0 = 8.4423e-04
Loss = 5.4391e-02, PNorm = 110.5404, GNorm = 0.5764, lr_0 = 8.4365e-04
Loss = 5.4783e-02, PNorm = 110.6037, GNorm = 0.4847, lr_0 = 8.4307e-04
Loss = 4.7734e-02, PNorm = 110.6666, GNorm = 0.4591, lr_0 = 8.4249e-04
Loss = 5.3694e-02, PNorm = 110.7277, GNorm = 0.4708, lr_0 = 8.4191e-04
Loss = 5.2345e-02, PNorm = 110.7922, GNorm = 0.3019, lr_0 = 8.4134e-04
Loss = 5.6395e-02, PNorm = 110.8477, GNorm = 0.2642, lr_0 = 8.4076e-04
Loss = 5.7732e-02, PNorm = 110.9162, GNorm = 0.6054, lr_0 = 8.4019e-04
Loss = 5.7129e-02, PNorm = 110.9834, GNorm = 1.1839, lr_0 = 8.3961e-04
Loss = 5.0014e-02, PNorm = 111.0432, GNorm = 0.5532, lr_0 = 8.3903e-04
Loss = 4.7208e-02, PNorm = 111.1118, GNorm = 1.0879, lr_0 = 8.3846e-04
Loss = 4.9598e-02, PNorm = 111.1646, GNorm = 0.5195, lr_0 = 8.3789e-04
Loss = 5.4722e-02, PNorm = 111.2378, GNorm = 0.7826, lr_0 = 8.3731e-04
Loss = 5.0934e-02, PNorm = 111.2959, GNorm = 0.5730, lr_0 = 8.3674e-04
Loss = 6.1863e-02, PNorm = 111.3541, GNorm = 0.3554, lr_0 = 8.3616e-04
Loss = 5.7478e-02, PNorm = 111.4160, GNorm = 0.7713, lr_0 = 8.3559e-04
Loss = 4.9611e-02, PNorm = 111.4884, GNorm = 0.6527, lr_0 = 8.3502e-04
Loss = 4.9722e-02, PNorm = 111.5478, GNorm = 0.3095, lr_0 = 8.3445e-04
Loss = 5.5511e-02, PNorm = 111.6145, GNorm = 0.2869, lr_0 = 8.3388e-04
Loss = 5.3540e-02, PNorm = 111.6730, GNorm = 0.9779, lr_0 = 8.3330e-04
Loss = 6.0036e-02, PNorm = 111.7394, GNorm = 0.6391, lr_0 = 8.3273e-04
Loss = 6.1756e-02, PNorm = 111.8085, GNorm = 0.4958, lr_0 = 8.3216e-04
Loss = 6.3372e-02, PNorm = 111.8848, GNorm = 0.7209, lr_0 = 8.3159e-04
Loss = 5.5830e-02, PNorm = 111.9592, GNorm = 0.3009, lr_0 = 8.3102e-04
Loss = 6.1540e-02, PNorm = 112.0307, GNorm = 0.5016, lr_0 = 8.3045e-04
Loss = 5.6180e-02, PNorm = 112.1096, GNorm = 1.1435, lr_0 = 8.2988e-04
Loss = 4.9744e-02, PNorm = 112.1816, GNorm = 0.5443, lr_0 = 8.2932e-04
Loss = 4.9356e-02, PNorm = 112.2464, GNorm = 0.2499, lr_0 = 8.2875e-04
Loss = 5.5101e-02, PNorm = 112.3100, GNorm = 0.5679, lr_0 = 8.2818e-04
Loss = 5.8126e-02, PNorm = 112.3776, GNorm = 0.3370, lr_0 = 8.2761e-04
Loss = 5.4787e-02, PNorm = 112.4562, GNorm = 0.3403, lr_0 = 8.2705e-04
Loss = 5.8874e-02, PNorm = 112.5251, GNorm = 0.5737, lr_0 = 8.2648e-04
Loss = 5.7324e-02, PNorm = 112.5990, GNorm = 0.4058, lr_0 = 8.2591e-04
Loss = 5.7136e-02, PNorm = 112.6737, GNorm = 0.5646, lr_0 = 8.2535e-04
Loss = 6.2553e-02, PNorm = 112.7468, GNorm = 0.4721, lr_0 = 8.2478e-04
Loss = 5.9963e-02, PNorm = 112.8204, GNorm = 0.4290, lr_0 = 8.2422e-04
Loss = 5.8192e-02, PNorm = 112.8966, GNorm = 0.3311, lr_0 = 8.2365e-04
Loss = 5.8128e-02, PNorm = 112.9770, GNorm = 0.4911, lr_0 = 8.2309e-04
Loss = 6.8494e-02, PNorm = 113.0566, GNorm = 0.5727, lr_0 = 8.2252e-04
Loss = 5.7069e-02, PNorm = 113.1422, GNorm = 0.5607, lr_0 = 8.2196e-04
Loss = 5.5781e-02, PNorm = 113.2123, GNorm = 0.8065, lr_0 = 8.2140e-04
Loss = 5.2151e-02, PNorm = 113.2871, GNorm = 0.7662, lr_0 = 8.2084e-04
Loss = 5.5423e-02, PNorm = 113.3537, GNorm = 0.2976, lr_0 = 8.2027e-04
Loss = 5.2739e-02, PNorm = 113.4328, GNorm = 0.3446, lr_0 = 8.1971e-04
Loss = 5.2390e-02, PNorm = 113.5081, GNorm = 0.3680, lr_0 = 8.1915e-04
Loss = 6.0744e-02, PNorm = 113.5783, GNorm = 0.3463, lr_0 = 8.1859e-04
Loss = 5.4053e-02, PNorm = 113.6531, GNorm = 0.3225, lr_0 = 8.1803e-04
Loss = 5.7371e-02, PNorm = 113.7302, GNorm = 0.5041, lr_0 = 8.1747e-04
Loss = 4.3552e-02, PNorm = 113.7985, GNorm = 0.4110, lr_0 = 8.1691e-04
Loss = 5.6113e-02, PNorm = 113.8690, GNorm = 0.3556, lr_0 = 8.1635e-04
Loss = 5.3460e-02, PNorm = 113.9400, GNorm = 0.5046, lr_0 = 8.1579e-04
Loss = 5.7640e-02, PNorm = 114.0161, GNorm = 0.5296, lr_0 = 8.1523e-04
Loss = 5.8616e-02, PNorm = 114.0957, GNorm = 0.3593, lr_0 = 8.1467e-04
Loss = 5.8716e-02, PNorm = 114.1698, GNorm = 0.5161, lr_0 = 8.1411e-04
Loss = 5.0504e-02, PNorm = 114.2459, GNorm = 0.3742, lr_0 = 8.1355e-04
Loss = 5.8549e-02, PNorm = 114.3283, GNorm = 0.3688, lr_0 = 8.1300e-04
Loss = 6.2782e-02, PNorm = 114.4134, GNorm = 0.8296, lr_0 = 8.1244e-04
Loss = 6.6981e-02, PNorm = 114.5098, GNorm = 0.7259, lr_0 = 8.1188e-04
Loss = 6.5027e-02, PNorm = 114.6025, GNorm = 0.5640, lr_0 = 8.1133e-04
Loss = 6.7302e-02, PNorm = 114.7036, GNorm = 0.5062, lr_0 = 8.1077e-04
Loss = 6.5476e-02, PNorm = 114.7930, GNorm = 0.4449, lr_0 = 8.1022e-04
Loss = 6.3215e-02, PNorm = 114.8796, GNorm = 0.6664, lr_0 = 8.0966e-04
Loss = 6.6467e-02, PNorm = 114.9662, GNorm = 0.4789, lr_0 = 8.0911e-04
Loss = 6.3118e-02, PNorm = 115.0505, GNorm = 0.5595, lr_0 = 8.0855e-04
Loss = 5.9217e-02, PNorm = 115.1361, GNorm = 0.5739, lr_0 = 8.0800e-04
Loss = 6.2564e-02, PNorm = 115.2181, GNorm = 0.5405, lr_0 = 8.0745e-04
Loss = 6.4044e-02, PNorm = 115.2924, GNorm = 0.5804, lr_0 = 8.0689e-04
Loss = 5.6881e-02, PNorm = 115.3785, GNorm = 0.3674, lr_0 = 8.0634e-04
Loss = 4.9068e-02, PNorm = 115.4564, GNorm = 0.3054, lr_0 = 8.0579e-04
Loss = 6.0649e-02, PNorm = 115.5397, GNorm = 0.6879, lr_0 = 8.0523e-04
Loss = 6.8553e-02, PNorm = 115.6290, GNorm = 1.0690, lr_0 = 8.0468e-04
Loss = 7.0588e-02, PNorm = 115.7275, GNorm = 0.7438, lr_0 = 8.0413e-04
Loss = 5.9196e-02, PNorm = 115.8202, GNorm = 0.9122, lr_0 = 8.0358e-04
Loss = 6.5005e-02, PNorm = 115.9175, GNorm = 0.6226, lr_0 = 8.0303e-04
Loss = 6.4064e-02, PNorm = 116.0132, GNorm = 0.3433, lr_0 = 8.0248e-04
Loss = 5.5117e-02, PNorm = 116.0993, GNorm = 0.3916, lr_0 = 8.0193e-04
Loss = 5.8947e-02, PNorm = 116.1814, GNorm = 0.3708, lr_0 = 8.0138e-04
Loss = 6.1707e-02, PNorm = 116.2620, GNorm = 0.2782, lr_0 = 8.0083e-04
Loss = 6.0203e-02, PNorm = 116.3447, GNorm = 0.6226, lr_0 = 8.0028e-04
Loss = 5.7656e-02, PNorm = 116.4239, GNorm = 0.7289, lr_0 = 7.9974e-04
Loss = 6.3860e-02, PNorm = 116.5143, GNorm = 0.3366, lr_0 = 7.9919e-04
Loss = 7.3166e-02, PNorm = 116.6053, GNorm = 0.2668, lr_0 = 7.9864e-04
Loss = 6.5754e-02, PNorm = 116.7111, GNorm = 0.5743, lr_0 = 7.9809e-04
Loss = 7.3541e-02, PNorm = 116.8057, GNorm = 0.8623, lr_0 = 7.9755e-04
Loss = 6.3717e-02, PNorm = 116.9061, GNorm = 0.5823, lr_0 = 7.9700e-04
Loss = 6.9518e-02, PNorm = 117.0129, GNorm = 0.3582, lr_0 = 7.9645e-04
Loss = 6.2104e-02, PNorm = 117.1129, GNorm = 0.3761, lr_0 = 7.9591e-04
Loss = 6.4340e-02, PNorm = 117.2062, GNorm = 0.4518, lr_0 = 7.9536e-04
Loss = 5.7087e-02, PNorm = 117.3046, GNorm = 0.3430, lr_0 = 7.9482e-04
Loss = 6.0733e-02, PNorm = 117.3939, GNorm = 0.6372, lr_0 = 7.9427e-04
Loss = 6.2113e-02, PNorm = 117.4884, GNorm = 0.3563, lr_0 = 7.9373e-04
Loss = 6.0162e-02, PNorm = 117.5789, GNorm = 0.7348, lr_0 = 7.9319e-04
Loss = 6.5327e-02, PNorm = 117.6648, GNorm = 0.8500, lr_0 = 7.9264e-04
Loss = 6.7032e-02, PNorm = 117.7664, GNorm = 0.5114, lr_0 = 7.9210e-04
Loss = 6.8307e-02, PNorm = 117.8633, GNorm = 0.5686, lr_0 = 7.9156e-04
Loss = 7.0895e-02, PNorm = 117.9690, GNorm = 0.3122, lr_0 = 7.9101e-04
Loss = 6.8660e-02, PNorm = 118.0752, GNorm = 0.6094, lr_0 = 7.9047e-04
Loss = 5.4661e-02, PNorm = 118.1780, GNorm = 0.4477, lr_0 = 7.8993e-04
Loss = 6.2154e-02, PNorm = 118.2639, GNorm = 0.3076, lr_0 = 7.8939e-04
Loss = 6.1956e-02, PNorm = 118.3487, GNorm = 0.3574, lr_0 = 7.8885e-04
Loss = 6.9912e-02, PNorm = 118.4402, GNorm = 0.6045, lr_0 = 7.8831e-04
Loss = 6.0683e-02, PNorm = 118.5307, GNorm = 0.7460, lr_0 = 7.8777e-04
Loss = 6.1921e-02, PNorm = 118.6240, GNorm = 0.5797, lr_0 = 7.8723e-04
Loss = 7.9881e-02, PNorm = 118.7117, GNorm = 0.7126, lr_0 = 7.8669e-04
Loss = 6.5820e-02, PNorm = 118.8122, GNorm = 0.3911, lr_0 = 7.8615e-04
Loss = 6.7335e-02, PNorm = 118.9169, GNorm = 0.5684, lr_0 = 7.8561e-04
Loss = 6.8124e-02, PNorm = 119.0141, GNorm = 0.6122, lr_0 = 7.8507e-04
Loss = 7.7025e-02, PNorm = 119.1086, GNorm = 0.4494, lr_0 = 7.8454e-04
Loss = 7.6050e-02, PNorm = 119.2001, GNorm = 1.0063, lr_0 = 7.8400e-04
Loss = 6.6888e-02, PNorm = 119.3039, GNorm = 0.8375, lr_0 = 7.8346e-04
Loss = 6.1928e-02, PNorm = 119.3893, GNorm = 0.6291, lr_0 = 7.8293e-04
Loss = 6.8459e-02, PNorm = 119.4849, GNorm = 0.3483, lr_0 = 7.8239e-04
Loss = 7.1071e-02, PNorm = 119.5760, GNorm = 0.7005, lr_0 = 7.8185e-04
Loss = 7.8095e-02, PNorm = 119.6789, GNorm = 0.8433, lr_0 = 7.8132e-04
Validation mae = 0.291654
Epoch 5
Loss = 4.6443e-02, PNorm = 119.7634, GNorm = 0.2980, lr_0 = 7.8078e-04
Loss = 4.5257e-02, PNorm = 119.8310, GNorm = 0.4544, lr_0 = 7.8025e-04
Loss = 5.1448e-02, PNorm = 119.8891, GNorm = 0.5030, lr_0 = 7.7971e-04
Loss = 4.9163e-02, PNorm = 119.9473, GNorm = 0.5216, lr_0 = 7.7918e-04
Loss = 4.5019e-02, PNorm = 119.9992, GNorm = 0.5013, lr_0 = 7.7864e-04
Loss = 4.4621e-02, PNorm = 120.0609, GNorm = 0.8928, lr_0 = 7.7811e-04
Loss = 4.7893e-02, PNorm = 120.1189, GNorm = 0.5525, lr_0 = 7.7758e-04
Loss = 3.8935e-02, PNorm = 120.1740, GNorm = 0.3404, lr_0 = 7.7705e-04
Loss = 3.6777e-02, PNorm = 120.2262, GNorm = 0.2687, lr_0 = 7.7651e-04
Loss = 4.9583e-02, PNorm = 120.2832, GNorm = 0.7316, lr_0 = 7.7598e-04
Loss = 3.5220e-02, PNorm = 120.3441, GNorm = 0.1943, lr_0 = 7.7545e-04
Loss = 4.4914e-02, PNorm = 120.3960, GNorm = 0.6434, lr_0 = 7.7492e-04
Loss = 4.7629e-02, PNorm = 120.4581, GNorm = 0.3276, lr_0 = 7.7439e-04
Loss = 4.4151e-02, PNorm = 120.5203, GNorm = 0.3904, lr_0 = 7.7386e-04
Loss = 3.5375e-02, PNorm = 120.5832, GNorm = 0.3365, lr_0 = 7.7333e-04
Loss = 4.0681e-02, PNorm = 120.6455, GNorm = 0.4221, lr_0 = 7.7280e-04
Loss = 3.9555e-02, PNorm = 120.6993, GNorm = 0.2937, lr_0 = 7.7227e-04
Loss = 4.5013e-02, PNorm = 120.7532, GNorm = 0.7802, lr_0 = 7.7174e-04
Loss = 4.3505e-02, PNorm = 120.8070, GNorm = 0.5767, lr_0 = 7.7121e-04
Loss = 4.1624e-02, PNorm = 120.8654, GNorm = 0.6045, lr_0 = 7.7068e-04
Loss = 4.3786e-02, PNorm = 120.9250, GNorm = 0.4899, lr_0 = 7.7015e-04
Loss = 3.8658e-02, PNorm = 120.9849, GNorm = 0.8444, lr_0 = 7.6963e-04
Loss = 4.0743e-02, PNorm = 121.0529, GNorm = 0.5311, lr_0 = 7.6910e-04
Loss = 3.8709e-02, PNorm = 121.1085, GNorm = 0.5512, lr_0 = 7.6857e-04
Loss = 4.9499e-02, PNorm = 121.1665, GNorm = 0.7549, lr_0 = 7.6805e-04
Loss = 3.7652e-02, PNorm = 121.2201, GNorm = 0.4851, lr_0 = 7.6752e-04
Loss = 4.0793e-02, PNorm = 121.2821, GNorm = 0.7266, lr_0 = 7.6699e-04
Loss = 4.4945e-02, PNorm = 121.3352, GNorm = 0.5155, lr_0 = 7.6647e-04
Loss = 4.1449e-02, PNorm = 121.3993, GNorm = 0.7536, lr_0 = 7.6594e-04
Loss = 3.8817e-02, PNorm = 121.4484, GNorm = 0.3305, lr_0 = 7.6542e-04
Loss = 4.0617e-02, PNorm = 121.5103, GNorm = 0.4098, lr_0 = 7.6489e-04
Loss = 5.2239e-02, PNorm = 121.5673, GNorm = 0.5423, lr_0 = 7.6437e-04
Loss = 4.4177e-02, PNorm = 121.6422, GNorm = 0.4742, lr_0 = 7.6385e-04
Loss = 3.8998e-02, PNorm = 121.7041, GNorm = 0.2978, lr_0 = 7.6332e-04
Loss = 3.9700e-02, PNorm = 121.7676, GNorm = 0.3888, lr_0 = 7.6280e-04
Loss = 4.3082e-02, PNorm = 121.8338, GNorm = 0.3538, lr_0 = 7.6228e-04
Loss = 4.3387e-02, PNorm = 121.9010, GNorm = 0.5879, lr_0 = 7.6176e-04
Loss = 3.7692e-02, PNorm = 121.9661, GNorm = 0.2594, lr_0 = 7.6123e-04
Loss = 4.4182e-02, PNorm = 122.0358, GNorm = 0.4828, lr_0 = 7.6071e-04
Loss = 4.3379e-02, PNorm = 122.0972, GNorm = 0.8411, lr_0 = 7.6019e-04
Loss = 3.5216e-02, PNorm = 122.1731, GNorm = 0.3444, lr_0 = 7.5967e-04
Loss = 4.7145e-02, PNorm = 122.2286, GNorm = 0.7808, lr_0 = 7.5915e-04
Loss = 4.4590e-02, PNorm = 122.2962, GNorm = 0.4180, lr_0 = 7.5863e-04
Loss = 3.7616e-02, PNorm = 122.3617, GNorm = 0.2718, lr_0 = 7.5811e-04
Loss = 4.3306e-02, PNorm = 122.4346, GNorm = 0.6581, lr_0 = 7.5759e-04
Loss = 4.9812e-02, PNorm = 122.5151, GNorm = 0.6773, lr_0 = 7.5707e-04
Loss = 3.8850e-02, PNorm = 122.5881, GNorm = 0.2967, lr_0 = 7.5655e-04
Loss = 4.2955e-02, PNorm = 122.6495, GNorm = 0.5643, lr_0 = 7.5603e-04
Loss = 3.6325e-02, PNorm = 122.7158, GNorm = 0.4361, lr_0 = 7.5552e-04
Loss = 4.1619e-02, PNorm = 122.7760, GNorm = 0.6496, lr_0 = 7.5500e-04
Loss = 4.2410e-02, PNorm = 122.8425, GNorm = 0.4665, lr_0 = 7.5448e-04
Loss = 3.6893e-02, PNorm = 122.9107, GNorm = 0.7505, lr_0 = 7.5397e-04
Loss = 3.7870e-02, PNorm = 122.9793, GNorm = 0.6966, lr_0 = 7.5345e-04
Loss = 3.6360e-02, PNorm = 123.0423, GNorm = 0.4374, lr_0 = 7.5293e-04
Loss = 4.3308e-02, PNorm = 123.1027, GNorm = 0.3241, lr_0 = 7.5242e-04
Loss = 4.3266e-02, PNorm = 123.1614, GNorm = 0.3338, lr_0 = 7.5190e-04
Loss = 4.6050e-02, PNorm = 123.2228, GNorm = 0.3439, lr_0 = 7.5139e-04
Loss = 4.1211e-02, PNorm = 123.2934, GNorm = 0.2780, lr_0 = 7.5087e-04
Loss = 4.3065e-02, PNorm = 123.3565, GNorm = 0.7929, lr_0 = 7.5036e-04
Loss = 4.2879e-02, PNorm = 123.4318, GNorm = 0.6935, lr_0 = 7.4984e-04
Loss = 5.2919e-02, PNorm = 123.4922, GNorm = 0.8789, lr_0 = 7.4933e-04
Loss = 4.0532e-02, PNorm = 123.5721, GNorm = 0.5810, lr_0 = 7.4882e-04
Loss = 4.5521e-02, PNorm = 123.6434, GNorm = 0.4112, lr_0 = 7.4830e-04
Loss = 4.1728e-02, PNorm = 123.7194, GNorm = 0.3604, lr_0 = 7.4779e-04
Loss = 4.1486e-02, PNorm = 123.7925, GNorm = 0.6700, lr_0 = 7.4728e-04
Loss = 4.4658e-02, PNorm = 123.8673, GNorm = 0.4833, lr_0 = 7.4677e-04
Loss = 4.5560e-02, PNorm = 123.9370, GNorm = 0.9221, lr_0 = 7.4625e-04
Loss = 4.2219e-02, PNorm = 124.0074, GNorm = 0.4415, lr_0 = 7.4574e-04
Loss = 4.1494e-02, PNorm = 124.0779, GNorm = 0.3988, lr_0 = 7.4523e-04
Loss = 4.2580e-02, PNorm = 124.1457, GNorm = 0.3018, lr_0 = 7.4472e-04
Loss = 4.1747e-02, PNorm = 124.2216, GNorm = 0.3857, lr_0 = 7.4421e-04
Loss = 4.4742e-02, PNorm = 124.2960, GNorm = 0.2381, lr_0 = 7.4370e-04
Loss = 4.2566e-02, PNorm = 124.3733, GNorm = 0.5259, lr_0 = 7.4319e-04
Loss = 4.0048e-02, PNorm = 124.4469, GNorm = 0.5018, lr_0 = 7.4268e-04
Loss = 5.0041e-02, PNorm = 124.5111, GNorm = 0.4866, lr_0 = 7.4217e-04
Loss = 3.7486e-02, PNorm = 124.5861, GNorm = 0.4455, lr_0 = 7.4167e-04
Loss = 4.2081e-02, PNorm = 124.6565, GNorm = 0.6589, lr_0 = 7.4116e-04
Loss = 4.4552e-02, PNorm = 124.7376, GNorm = 0.4550, lr_0 = 7.4065e-04
Loss = 4.7580e-02, PNorm = 124.8197, GNorm = 0.4916, lr_0 = 7.4014e-04
Loss = 4.2245e-02, PNorm = 124.9027, GNorm = 0.4039, lr_0 = 7.3964e-04
Loss = 3.9828e-02, PNorm = 124.9759, GNorm = 0.4542, lr_0 = 7.3913e-04
Loss = 4.8779e-02, PNorm = 125.0514, GNorm = 0.6073, lr_0 = 7.3862e-04
Loss = 4.5203e-02, PNorm = 125.1198, GNorm = 0.4623, lr_0 = 7.3812e-04
Loss = 4.7076e-02, PNorm = 125.1959, GNorm = 0.7484, lr_0 = 7.3761e-04
Loss = 4.5088e-02, PNorm = 125.2662, GNorm = 0.5853, lr_0 = 7.3711e-04
Loss = 4.2954e-02, PNorm = 125.3414, GNorm = 0.2897, lr_0 = 7.3660e-04
Loss = 5.0971e-02, PNorm = 125.4182, GNorm = 0.2578, lr_0 = 7.3610e-04
Loss = 4.4176e-02, PNorm = 125.4923, GNorm = 0.4238, lr_0 = 7.3559e-04
Loss = 4.4867e-02, PNorm = 125.5750, GNorm = 0.4307, lr_0 = 7.3509e-04
Loss = 4.6556e-02, PNorm = 125.6573, GNorm = 0.2691, lr_0 = 7.3458e-04
Loss = 4.3535e-02, PNorm = 125.7397, GNorm = 0.3357, lr_0 = 7.3408e-04
Loss = 4.0873e-02, PNorm = 125.8078, GNorm = 0.3839, lr_0 = 7.3358e-04
Loss = 4.1121e-02, PNorm = 125.8755, GNorm = 0.4616, lr_0 = 7.3308e-04
Loss = 4.8321e-02, PNorm = 125.9510, GNorm = 0.3954, lr_0 = 7.3257e-04
Loss = 4.0112e-02, PNorm = 126.0313, GNorm = 0.3164, lr_0 = 7.3207e-04
Loss = 4.7629e-02, PNorm = 126.1018, GNorm = 0.7748, lr_0 = 7.3157e-04
Loss = 4.4428e-02, PNorm = 126.1792, GNorm = 0.4932, lr_0 = 7.3107e-04
Loss = 4.7619e-02, PNorm = 126.2533, GNorm = 0.7838, lr_0 = 7.3057e-04
Loss = 5.0311e-02, PNorm = 126.3343, GNorm = 0.8437, lr_0 = 7.3007e-04
Loss = 3.9280e-02, PNorm = 126.4198, GNorm = 0.7818, lr_0 = 7.2957e-04
Loss = 4.3060e-02, PNorm = 126.5029, GNorm = 0.3865, lr_0 = 7.2907e-04
Loss = 4.3989e-02, PNorm = 126.5812, GNorm = 0.5085, lr_0 = 7.2857e-04
Loss = 5.6746e-02, PNorm = 126.6615, GNorm = 0.3904, lr_0 = 7.2807e-04
Loss = 4.4379e-02, PNorm = 126.7467, GNorm = 0.6748, lr_0 = 7.2757e-04
Loss = 4.7629e-02, PNorm = 126.8258, GNorm = 0.4945, lr_0 = 7.2707e-04
Loss = 4.2236e-02, PNorm = 126.9073, GNorm = 0.4557, lr_0 = 7.2657e-04
Loss = 4.4988e-02, PNorm = 126.9900, GNorm = 0.7727, lr_0 = 7.2608e-04
Loss = 4.4380e-02, PNorm = 127.0701, GNorm = 0.3115, lr_0 = 7.2558e-04
Loss = 5.0717e-02, PNorm = 127.1563, GNorm = 1.1011, lr_0 = 7.2508e-04
Loss = 5.5276e-02, PNorm = 127.2351, GNorm = 0.3042, lr_0 = 7.2458e-04
Loss = 5.1315e-02, PNorm = 127.3279, GNorm = 1.1717, lr_0 = 7.2409e-04
Loss = 5.7016e-02, PNorm = 127.4155, GNorm = 0.4558, lr_0 = 7.2359e-04
Loss = 5.7718e-02, PNorm = 127.5090, GNorm = 0.3432, lr_0 = 7.2310e-04
Loss = 4.7701e-02, PNorm = 127.5992, GNorm = 0.4842, lr_0 = 7.2260e-04
Loss = 4.9054e-02, PNorm = 127.6838, GNorm = 0.3334, lr_0 = 7.2211e-04
Loss = 4.6235e-02, PNorm = 127.7716, GNorm = 0.2654, lr_0 = 7.2161e-04
Loss = 4.6684e-02, PNorm = 127.8484, GNorm = 0.6257, lr_0 = 7.2112e-04
Loss = 5.0145e-02, PNorm = 127.9365, GNorm = 0.5526, lr_0 = 7.2062e-04
Loss = 4.8602e-02, PNorm = 128.0248, GNorm = 0.6125, lr_0 = 7.2013e-04
Loss = 5.3386e-02, PNorm = 128.1034, GNorm = 0.4932, lr_0 = 7.1964e-04
Validation mae = 0.290752
Epoch 6
Loss = 3.7296e-02, PNorm = 128.1747, GNorm = 0.3808, lr_0 = 7.1914e-04
Loss = 3.3722e-02, PNorm = 128.2383, GNorm = 0.2798, lr_0 = 7.1865e-04
Loss = 3.8578e-02, PNorm = 128.2971, GNorm = 0.2775, lr_0 = 7.1816e-04
Loss = 3.0330e-02, PNorm = 128.3469, GNorm = 0.7643, lr_0 = 7.1767e-04
Loss = 3.7249e-02, PNorm = 128.4026, GNorm = 0.5362, lr_0 = 7.1717e-04
Loss = 2.9836e-02, PNorm = 128.4502, GNorm = 0.1977, lr_0 = 7.1668e-04
Loss = 3.4657e-02, PNorm = 128.5064, GNorm = 0.5049, lr_0 = 7.1619e-04
Loss = 3.3024e-02, PNorm = 128.5621, GNorm = 0.2213, lr_0 = 7.1570e-04
Loss = 3.2276e-02, PNorm = 128.6189, GNorm = 0.2578, lr_0 = 7.1521e-04
Loss = 3.2664e-02, PNorm = 128.6697, GNorm = 0.7365, lr_0 = 7.1472e-04
Loss = 3.3317e-02, PNorm = 128.7199, GNorm = 0.3550, lr_0 = 7.1423e-04
Loss = 3.2121e-02, PNorm = 128.7681, GNorm = 1.0492, lr_0 = 7.1374e-04
Loss = 3.5014e-02, PNorm = 128.8193, GNorm = 0.4168, lr_0 = 7.1325e-04
Loss = 3.3820e-02, PNorm = 128.8728, GNorm = 0.5290, lr_0 = 7.1277e-04
Loss = 2.6563e-02, PNorm = 128.9307, GNorm = 0.3867, lr_0 = 7.1228e-04
Loss = 3.2220e-02, PNorm = 128.9798, GNorm = 0.4059, lr_0 = 7.1179e-04
Loss = 3.5698e-02, PNorm = 129.0326, GNorm = 0.3531, lr_0 = 7.1130e-04
Loss = 3.2951e-02, PNorm = 129.0877, GNorm = 0.2630, lr_0 = 7.1081e-04
Loss = 2.9519e-02, PNorm = 129.1449, GNorm = 0.3700, lr_0 = 7.1033e-04
Loss = 3.4563e-02, PNorm = 129.2052, GNorm = 0.7364, lr_0 = 7.0984e-04
Loss = 3.0333e-02, PNorm = 129.2517, GNorm = 0.4789, lr_0 = 7.0935e-04
Loss = 3.0121e-02, PNorm = 129.3021, GNorm = 0.3089, lr_0 = 7.0887e-04
Loss = 3.5414e-02, PNorm = 129.3583, GNorm = 0.6419, lr_0 = 7.0838e-04
Loss = 3.0410e-02, PNorm = 129.4118, GNorm = 0.4580, lr_0 = 7.0790e-04
Loss = 3.4325e-02, PNorm = 129.4673, GNorm = 0.6862, lr_0 = 7.0741e-04
Loss = 3.2160e-02, PNorm = 129.5249, GNorm = 0.4788, lr_0 = 7.0693e-04
Loss = 3.3120e-02, PNorm = 129.5800, GNorm = 0.3865, lr_0 = 7.0644e-04
Loss = 2.8220e-02, PNorm = 129.6327, GNorm = 0.4431, lr_0 = 7.0596e-04
Loss = 3.2214e-02, PNorm = 129.6842, GNorm = 0.2403, lr_0 = 7.0548e-04
Loss = 2.8907e-02, PNorm = 129.7349, GNorm = 0.2611, lr_0 = 7.0499e-04
Loss = 3.7376e-02, PNorm = 129.7834, GNorm = 0.3213, lr_0 = 7.0451e-04
Loss = 3.0419e-02, PNorm = 129.8423, GNorm = 0.3970, lr_0 = 7.0403e-04
Loss = 2.6848e-02, PNorm = 129.8908, GNorm = 0.5642, lr_0 = 7.0354e-04
Loss = 2.8935e-02, PNorm = 129.9377, GNorm = 0.3618, lr_0 = 7.0306e-04
Loss = 3.4288e-02, PNorm = 129.9939, GNorm = 0.2620, lr_0 = 7.0258e-04
Loss = 3.3881e-02, PNorm = 130.0445, GNorm = 0.2741, lr_0 = 7.0210e-04
Loss = 3.1084e-02, PNorm = 130.1032, GNorm = 0.2120, lr_0 = 7.0162e-04
Loss = 2.8503e-02, PNorm = 130.1556, GNorm = 0.3963, lr_0 = 7.0114e-04
Loss = 2.8248e-02, PNorm = 130.2067, GNorm = 0.4811, lr_0 = 7.0066e-04
Loss = 2.4227e-02, PNorm = 130.2570, GNorm = 0.1504, lr_0 = 7.0018e-04
Loss = 3.7200e-02, PNorm = 130.3028, GNorm = 0.2530, lr_0 = 6.9970e-04
Loss = 3.1613e-02, PNorm = 130.3620, GNorm = 0.8384, lr_0 = 6.9922e-04
Loss = 3.3512e-02, PNorm = 130.4248, GNorm = 0.4676, lr_0 = 6.9874e-04
Loss = 3.1701e-02, PNorm = 130.4888, GNorm = 0.3642, lr_0 = 6.9826e-04
Loss = 3.4958e-02, PNorm = 130.5455, GNorm = 0.5123, lr_0 = 6.9778e-04
Loss = 3.7440e-02, PNorm = 130.6027, GNorm = 0.3368, lr_0 = 6.9730e-04
Loss = 3.0566e-02, PNorm = 130.6675, GNorm = 0.5385, lr_0 = 6.9683e-04
Loss = 3.2125e-02, PNorm = 130.7237, GNorm = 0.5014, lr_0 = 6.9635e-04
Loss = 3.3989e-02, PNorm = 130.7835, GNorm = 0.3443, lr_0 = 6.9587e-04
Loss = 3.9304e-02, PNorm = 130.8411, GNorm = 0.6183, lr_0 = 6.9540e-04
Loss = 2.9580e-02, PNorm = 130.8975, GNorm = 0.3788, lr_0 = 6.9492e-04
Loss = 3.0867e-02, PNorm = 130.9553, GNorm = 0.4733, lr_0 = 6.9444e-04
Loss = 3.2875e-02, PNorm = 131.0118, GNorm = 0.4214, lr_0 = 6.9397e-04
Loss = 3.5274e-02, PNorm = 131.0646, GNorm = 0.6314, lr_0 = 6.9349e-04
Loss = 2.9686e-02, PNorm = 131.1136, GNorm = 0.3055, lr_0 = 6.9302e-04
Loss = 3.1992e-02, PNorm = 131.1695, GNorm = 0.2530, lr_0 = 6.9254e-04
Loss = 3.1874e-02, PNorm = 131.2229, GNorm = 0.2089, lr_0 = 6.9207e-04
Loss = 3.2998e-02, PNorm = 131.2748, GNorm = 0.8042, lr_0 = 6.9159e-04
Loss = 3.6710e-02, PNorm = 131.3338, GNorm = 0.5105, lr_0 = 6.9112e-04
Loss = 3.8848e-02, PNorm = 131.3937, GNorm = 0.3099, lr_0 = 6.9065e-04
Loss = 3.2018e-02, PNorm = 131.4594, GNorm = 0.4094, lr_0 = 6.9017e-04
Loss = 3.2578e-02, PNorm = 131.5235, GNorm = 0.4869, lr_0 = 6.8970e-04
Loss = 2.8722e-02, PNorm = 131.5809, GNorm = 0.3560, lr_0 = 6.8923e-04
Loss = 3.3455e-02, PNorm = 131.6338, GNorm = 0.4107, lr_0 = 6.8876e-04
Loss = 3.0847e-02, PNorm = 131.6854, GNorm = 0.2269, lr_0 = 6.8828e-04
Loss = 3.6792e-02, PNorm = 131.7443, GNorm = 0.3223, lr_0 = 6.8781e-04
Loss = 4.0795e-02, PNorm = 131.8071, GNorm = 0.2918, lr_0 = 6.8734e-04
Loss = 3.0435e-02, PNorm = 131.8774, GNorm = 0.3914, lr_0 = 6.8687e-04
Loss = 3.8548e-02, PNorm = 131.9482, GNorm = 0.4372, lr_0 = 6.8640e-04
Loss = 3.7307e-02, PNorm = 132.0197, GNorm = 0.4617, lr_0 = 6.8593e-04
Loss = 3.3649e-02, PNorm = 132.0840, GNorm = 0.7050, lr_0 = 6.8546e-04
Loss = 4.1009e-02, PNorm = 132.1498, GNorm = 0.3653, lr_0 = 6.8499e-04
Loss = 3.0404e-02, PNorm = 132.2130, GNorm = 0.6845, lr_0 = 6.8452e-04
Loss = 3.4416e-02, PNorm = 132.2700, GNorm = 0.4909, lr_0 = 6.8405e-04
Loss = 3.4417e-02, PNorm = 132.3331, GNorm = 0.3788, lr_0 = 6.8358e-04
Loss = 3.2058e-02, PNorm = 132.3996, GNorm = 0.2861, lr_0 = 6.8312e-04
Loss = 3.2283e-02, PNorm = 132.4584, GNorm = 0.4050, lr_0 = 6.8265e-04
Loss = 3.3395e-02, PNorm = 132.5234, GNorm = 0.2088, lr_0 = 6.8218e-04
Loss = 3.1005e-02, PNorm = 132.5902, GNorm = 0.3142, lr_0 = 6.8171e-04
Loss = 3.4533e-02, PNorm = 132.6532, GNorm = 0.2390, lr_0 = 6.8125e-04
Loss = 3.9504e-02, PNorm = 132.7140, GNorm = 0.3311, lr_0 = 6.8078e-04
Loss = 3.4101e-02, PNorm = 132.7781, GNorm = 0.2832, lr_0 = 6.8031e-04
Loss = 3.4695e-02, PNorm = 132.8419, GNorm = 0.4387, lr_0 = 6.7985e-04
Loss = 3.6254e-02, PNorm = 132.9124, GNorm = 0.7253, lr_0 = 6.7938e-04
Loss = 3.4760e-02, PNorm = 132.9718, GNorm = 0.2125, lr_0 = 6.7892e-04
Loss = 3.3341e-02, PNorm = 133.0380, GNorm = 0.3392, lr_0 = 6.7845e-04
Loss = 3.2650e-02, PNorm = 133.1075, GNorm = 0.3052, lr_0 = 6.7799e-04
Loss = 3.3667e-02, PNorm = 133.1831, GNorm = 0.1847, lr_0 = 6.7752e-04
Loss = 3.8496e-02, PNorm = 133.2590, GNorm = 0.4367, lr_0 = 6.7706e-04
Loss = 3.3890e-02, PNorm = 133.3218, GNorm = 0.4367, lr_0 = 6.7659e-04
Loss = 3.6389e-02, PNorm = 133.3927, GNorm = 0.4517, lr_0 = 6.7613e-04
Loss = 3.5246e-02, PNorm = 133.4595, GNorm = 0.7260, lr_0 = 6.7567e-04
Loss = 3.3497e-02, PNorm = 133.5276, GNorm = 0.2282, lr_0 = 6.7520e-04
Loss = 3.5376e-02, PNorm = 133.5961, GNorm = 0.7308, lr_0 = 6.7474e-04
Loss = 3.5266e-02, PNorm = 133.6639, GNorm = 0.5267, lr_0 = 6.7428e-04
Loss = 3.5957e-02, PNorm = 133.7365, GNorm = 0.4267, lr_0 = 6.7382e-04
Loss = 3.0592e-02, PNorm = 133.8031, GNorm = 0.2680, lr_0 = 6.7335e-04
Loss = 3.9138e-02, PNorm = 133.8590, GNorm = 0.3580, lr_0 = 6.7289e-04
Loss = 4.1829e-02, PNorm = 133.9252, GNorm = 0.4436, lr_0 = 6.7243e-04
Loss = 3.4748e-02, PNorm = 133.9932, GNorm = 0.9222, lr_0 = 6.7197e-04
Loss = 3.2325e-02, PNorm = 134.0686, GNorm = 0.2136, lr_0 = 6.7151e-04
Loss = 4.5944e-02, PNorm = 134.1401, GNorm = 0.6729, lr_0 = 6.7105e-04
Loss = 3.6081e-02, PNorm = 134.2162, GNorm = 0.3173, lr_0 = 6.7059e-04
Loss = 3.7514e-02, PNorm = 134.2927, GNorm = 0.3493, lr_0 = 6.7013e-04
Loss = 3.5959e-02, PNorm = 134.3635, GNorm = 0.2641, lr_0 = 6.6967e-04
Loss = 3.4024e-02, PNorm = 134.4361, GNorm = 0.3635, lr_0 = 6.6921e-04
Loss = 3.8986e-02, PNorm = 134.5041, GNorm = 0.9221, lr_0 = 6.6876e-04
Loss = 3.9138e-02, PNorm = 134.5812, GNorm = 1.0246, lr_0 = 6.6830e-04
Loss = 3.9257e-02, PNorm = 134.6629, GNorm = 0.4362, lr_0 = 6.6784e-04
Loss = 4.3078e-02, PNorm = 134.7367, GNorm = 1.0836, lr_0 = 6.6738e-04
Loss = 4.2265e-02, PNorm = 134.8196, GNorm = 0.7999, lr_0 = 6.6693e-04
Loss = 4.1494e-02, PNorm = 134.8878, GNorm = 0.2671, lr_0 = 6.6647e-04
Loss = 3.7502e-02, PNorm = 134.9616, GNorm = 0.3078, lr_0 = 6.6601e-04
Loss = 3.9128e-02, PNorm = 135.0327, GNorm = 0.4064, lr_0 = 6.6556e-04
Loss = 3.6456e-02, PNorm = 135.1060, GNorm = 0.8923, lr_0 = 6.6510e-04
Loss = 3.5783e-02, PNorm = 135.1694, GNorm = 0.5180, lr_0 = 6.6464e-04
Loss = 3.1451e-02, PNorm = 135.2423, GNorm = 0.5365, lr_0 = 6.6419e-04
Loss = 4.2764e-02, PNorm = 135.3054, GNorm = 0.4955, lr_0 = 6.6373e-04
Loss = 4.2272e-02, PNorm = 135.3819, GNorm = 0.6072, lr_0 = 6.6328e-04
Loss = 3.5611e-02, PNorm = 135.4450, GNorm = 0.4735, lr_0 = 6.6282e-04
Validation mae = 0.287379
Epoch 7
Loss = 2.7282e-02, PNorm = 135.5034, GNorm = 0.3039, lr_0 = 6.6237e-04
Loss = 3.5466e-02, PNorm = 135.5502, GNorm = 0.5699, lr_0 = 6.6192e-04
Loss = 3.1520e-02, PNorm = 135.5977, GNorm = 0.4958, lr_0 = 6.6146e-04
Loss = 2.9316e-02, PNorm = 135.6450, GNorm = 0.5131, lr_0 = 6.6101e-04
Loss = 3.2840e-02, PNorm = 135.6952, GNorm = 0.4681, lr_0 = 6.6056e-04
Loss = 3.1243e-02, PNorm = 135.7453, GNorm = 0.1918, lr_0 = 6.6011e-04
Loss = 2.8637e-02, PNorm = 135.7990, GNorm = 0.3858, lr_0 = 6.5965e-04
Loss = 2.8936e-02, PNorm = 135.8588, GNorm = 0.4222, lr_0 = 6.5920e-04
Loss = 2.9269e-02, PNorm = 135.9105, GNorm = 0.2874, lr_0 = 6.5875e-04
Loss = 2.6985e-02, PNorm = 135.9618, GNorm = 0.1820, lr_0 = 6.5830e-04
Loss = 2.9505e-02, PNorm = 136.0085, GNorm = 0.3377, lr_0 = 6.5785e-04
Loss = 2.7436e-02, PNorm = 136.0559, GNorm = 0.2931, lr_0 = 6.5740e-04
Loss = 2.6470e-02, PNorm = 136.1000, GNorm = 0.6968, lr_0 = 6.5695e-04
Loss = 2.5857e-02, PNorm = 136.1466, GNorm = 0.9679, lr_0 = 6.5650e-04
Loss = 2.3178e-02, PNorm = 136.1899, GNorm = 0.3249, lr_0 = 6.5605e-04
Loss = 2.6192e-02, PNorm = 136.2367, GNorm = 0.2118, lr_0 = 6.5560e-04
Loss = 2.2062e-02, PNorm = 136.2880, GNorm = 0.5149, lr_0 = 6.5515e-04
Loss = 2.7008e-02, PNorm = 136.3331, GNorm = 1.0134, lr_0 = 6.5470e-04
Loss = 2.9734e-02, PNorm = 136.3769, GNorm = 1.0351, lr_0 = 6.5425e-04
Loss = 3.1740e-02, PNorm = 136.4233, GNorm = 0.2642, lr_0 = 6.5380e-04
Loss = 2.7216e-02, PNorm = 136.4709, GNorm = 0.3183, lr_0 = 6.5335e-04
Loss = 2.9559e-02, PNorm = 136.5226, GNorm = 0.4049, lr_0 = 6.5291e-04
Loss = 2.6120e-02, PNorm = 136.5722, GNorm = 0.2757, lr_0 = 6.5246e-04
Loss = 3.0026e-02, PNorm = 136.6251, GNorm = 0.2957, lr_0 = 6.5201e-04
Loss = 3.3812e-02, PNorm = 136.6817, GNorm = 0.3814, lr_0 = 6.5157e-04
Loss = 2.5832e-02, PNorm = 136.7305, GNorm = 0.2686, lr_0 = 6.5112e-04
Loss = 2.3826e-02, PNorm = 136.7758, GNorm = 0.2394, lr_0 = 6.5067e-04
Loss = 2.2817e-02, PNorm = 136.8148, GNorm = 0.2190, lr_0 = 6.5023e-04
Loss = 3.0669e-02, PNorm = 136.8537, GNorm = 0.5742, lr_0 = 6.4978e-04
Loss = 2.7650e-02, PNorm = 136.9074, GNorm = 0.2215, lr_0 = 6.4934e-04
Loss = 2.5013e-02, PNorm = 136.9559, GNorm = 0.5318, lr_0 = 6.4889e-04
Loss = 2.2969e-02, PNorm = 137.0063, GNorm = 0.2586, lr_0 = 6.4845e-04
Loss = 2.4325e-02, PNorm = 137.0500, GNorm = 0.2246, lr_0 = 6.4800e-04
Loss = 2.4012e-02, PNorm = 137.0877, GNorm = 0.4184, lr_0 = 6.4756e-04
Loss = 2.2658e-02, PNorm = 137.1292, GNorm = 0.4082, lr_0 = 6.4712e-04
Loss = 2.2872e-02, PNorm = 137.1744, GNorm = 0.8463, lr_0 = 6.4667e-04
Loss = 2.2734e-02, PNorm = 137.2230, GNorm = 0.5322, lr_0 = 6.4623e-04
Loss = 2.5718e-02, PNorm = 137.2681, GNorm = 0.2652, lr_0 = 6.4579e-04
Loss = 2.5196e-02, PNorm = 137.3159, GNorm = 0.2097, lr_0 = 6.4534e-04
Loss = 2.4547e-02, PNorm = 137.3661, GNorm = 0.4631, lr_0 = 6.4490e-04
Loss = 2.2641e-02, PNorm = 137.4191, GNorm = 0.2238, lr_0 = 6.4446e-04
Loss = 3.1547e-02, PNorm = 137.4692, GNorm = 0.2858, lr_0 = 6.4402e-04
Loss = 2.8422e-02, PNorm = 137.5224, GNorm = 0.6285, lr_0 = 6.4358e-04
Loss = 2.4873e-02, PNorm = 137.5732, GNorm = 0.1786, lr_0 = 6.4314e-04
Loss = 2.3397e-02, PNorm = 137.6301, GNorm = 0.2364, lr_0 = 6.4270e-04
Loss = 2.6305e-02, PNorm = 137.6820, GNorm = 0.3593, lr_0 = 6.4226e-04
Loss = 2.5259e-02, PNorm = 137.7330, GNorm = 0.4063, lr_0 = 6.4182e-04
Loss = 2.8966e-02, PNorm = 137.7716, GNorm = 0.5997, lr_0 = 6.4138e-04
Loss = 2.6057e-02, PNorm = 137.8130, GNorm = 0.7690, lr_0 = 6.4094e-04
Loss = 2.6937e-02, PNorm = 137.8599, GNorm = 0.5503, lr_0 = 6.4050e-04
Loss = 2.7301e-02, PNorm = 137.9129, GNorm = 0.3735, lr_0 = 6.4006e-04
Loss = 2.4966e-02, PNorm = 137.9626, GNorm = 0.5633, lr_0 = 6.3962e-04
Loss = 2.3763e-02, PNorm = 138.0096, GNorm = 0.2212, lr_0 = 6.3918e-04
Loss = 2.7882e-02, PNorm = 138.0640, GNorm = 0.6753, lr_0 = 6.3874e-04
Loss = 2.4574e-02, PNorm = 138.1134, GNorm = 0.4477, lr_0 = 6.3831e-04
Loss = 2.8743e-02, PNorm = 138.1660, GNorm = 0.5993, lr_0 = 6.3787e-04
Loss = 2.4932e-02, PNorm = 138.2225, GNorm = 0.4224, lr_0 = 6.3743e-04
Loss = 2.6374e-02, PNorm = 138.2675, GNorm = 0.4443, lr_0 = 6.3700e-04
Loss = 2.4670e-02, PNorm = 138.3065, GNorm = 0.3020, lr_0 = 6.3656e-04
Loss = 2.6275e-02, PNorm = 138.3496, GNorm = 0.4027, lr_0 = 6.3612e-04
Loss = 2.9170e-02, PNorm = 138.4035, GNorm = 0.7918, lr_0 = 6.3569e-04
Loss = 2.7613e-02, PNorm = 138.4624, GNorm = 0.3264, lr_0 = 6.3525e-04
Loss = 3.2083e-02, PNorm = 138.5195, GNorm = 0.3822, lr_0 = 6.3482e-04
Loss = 2.4802e-02, PNorm = 138.5820, GNorm = 0.2924, lr_0 = 6.3438e-04
Loss = 2.4937e-02, PNorm = 138.6363, GNorm = 0.4243, lr_0 = 6.3395e-04
Loss = 2.6597e-02, PNorm = 138.6857, GNorm = 0.3976, lr_0 = 6.3351e-04
Loss = 3.0741e-02, PNorm = 138.7450, GNorm = 0.6428, lr_0 = 6.3308e-04
Loss = 2.6278e-02, PNorm = 138.7958, GNorm = 0.3130, lr_0 = 6.3265e-04
Loss = 2.5103e-02, PNorm = 138.8485, GNorm = 0.2471, lr_0 = 6.3221e-04
Loss = 2.7279e-02, PNorm = 138.8982, GNorm = 0.5795, lr_0 = 6.3178e-04
Loss = 2.8583e-02, PNorm = 138.9510, GNorm = 0.1801, lr_0 = 6.3135e-04
Loss = 3.1354e-02, PNorm = 139.0045, GNorm = 0.2924, lr_0 = 6.3091e-04
Loss = 2.6608e-02, PNorm = 139.0592, GNorm = 0.4710, lr_0 = 6.3048e-04
Loss = 2.8556e-02, PNorm = 139.1138, GNorm = 0.2247, lr_0 = 6.3005e-04
Loss = 2.3145e-02, PNorm = 139.1689, GNorm = 0.3695, lr_0 = 6.2962e-04
Loss = 2.6982e-02, PNorm = 139.2160, GNorm = 0.1837, lr_0 = 6.2919e-04
Loss = 2.8062e-02, PNorm = 139.2711, GNorm = 0.4423, lr_0 = 6.2876e-04
Loss = 3.1462e-02, PNorm = 139.3283, GNorm = 0.4927, lr_0 = 6.2833e-04
Loss = 2.6180e-02, PNorm = 139.3867, GNorm = 0.3557, lr_0 = 6.2789e-04
Loss = 2.5102e-02, PNorm = 139.4424, GNorm = 0.3712, lr_0 = 6.2746e-04
Loss = 2.8124e-02, PNorm = 139.4947, GNorm = 0.3450, lr_0 = 6.2703e-04
Loss = 2.9456e-02, PNorm = 139.5481, GNorm = 0.4812, lr_0 = 6.2661e-04
Loss = 2.5181e-02, PNorm = 139.6015, GNorm = 0.2869, lr_0 = 6.2618e-04
Loss = 2.8364e-02, PNorm = 139.6578, GNorm = 0.4028, lr_0 = 6.2575e-04
Loss = 2.5423e-02, PNorm = 139.7198, GNorm = 0.2415, lr_0 = 6.2532e-04
Loss = 2.7664e-02, PNorm = 139.7707, GNorm = 0.5730, lr_0 = 6.2489e-04
Loss = 2.5519e-02, PNorm = 139.8254, GNorm = 0.1802, lr_0 = 6.2446e-04
Loss = 2.9420e-02, PNorm = 139.8813, GNorm = 0.4123, lr_0 = 6.2403e-04
Loss = 2.4624e-02, PNorm = 139.9352, GNorm = 0.2112, lr_0 = 6.2361e-04
Loss = 2.4107e-02, PNorm = 139.9859, GNorm = 0.2480, lr_0 = 6.2318e-04
Loss = 2.4158e-02, PNorm = 140.0361, GNorm = 0.5244, lr_0 = 6.2275e-04
Loss = 2.5461e-02, PNorm = 140.0954, GNorm = 0.1657, lr_0 = 6.2233e-04
Loss = 2.4519e-02, PNorm = 140.1475, GNorm = 0.5817, lr_0 = 6.2190e-04
Loss = 2.3051e-02, PNorm = 140.2032, GNorm = 0.6754, lr_0 = 6.2147e-04
Loss = 2.7165e-02, PNorm = 140.2552, GNorm = 0.2263, lr_0 = 6.2105e-04
Loss = 2.7058e-02, PNorm = 140.3142, GNorm = 1.1698, lr_0 = 6.2062e-04
Loss = 2.1792e-02, PNorm = 140.3684, GNorm = 0.5329, lr_0 = 6.2020e-04
Loss = 3.1672e-02, PNorm = 140.4274, GNorm = 0.3579, lr_0 = 6.1977e-04
Loss = 2.8295e-02, PNorm = 140.4879, GNorm = 0.4384, lr_0 = 6.1935e-04
Loss = 2.6231e-02, PNorm = 140.5515, GNorm = 0.2675, lr_0 = 6.1892e-04
Loss = 2.9139e-02, PNorm = 140.6098, GNorm = 0.4282, lr_0 = 6.1850e-04
Loss = 2.6252e-02, PNorm = 140.6732, GNorm = 0.5390, lr_0 = 6.1808e-04
Loss = 2.3274e-02, PNorm = 140.7391, GNorm = 0.4115, lr_0 = 6.1765e-04
Loss = 2.5768e-02, PNorm = 140.7991, GNorm = 0.2627, lr_0 = 6.1723e-04
Loss = 2.5248e-02, PNorm = 140.8501, GNorm = 0.3244, lr_0 = 6.1681e-04
Loss = 2.7002e-02, PNorm = 140.9080, GNorm = 0.2202, lr_0 = 6.1638e-04
Loss = 2.3989e-02, PNorm = 140.9582, GNorm = 0.4402, lr_0 = 6.1596e-04
Loss = 2.7463e-02, PNorm = 141.0083, GNorm = 0.4904, lr_0 = 6.1554e-04
Loss = 2.8041e-02, PNorm = 141.0620, GNorm = 0.6751, lr_0 = 6.1512e-04
Loss = 3.0775e-02, PNorm = 141.1203, GNorm = 0.3189, lr_0 = 6.1470e-04
Loss = 2.8336e-02, PNorm = 141.1759, GNorm = 0.4381, lr_0 = 6.1428e-04
Loss = 2.8886e-02, PNorm = 141.2387, GNorm = 0.4015, lr_0 = 6.1385e-04
Loss = 2.5805e-02, PNorm = 141.3006, GNorm = 0.6228, lr_0 = 6.1343e-04
Loss = 2.9371e-02, PNorm = 141.3604, GNorm = 0.5986, lr_0 = 6.1301e-04
Loss = 2.3543e-02, PNorm = 141.4064, GNorm = 0.5759, lr_0 = 6.1259e-04
Loss = 3.1180e-02, PNorm = 141.4616, GNorm = 0.3948, lr_0 = 6.1217e-04
Loss = 3.3605e-02, PNorm = 141.5234, GNorm = 0.3528, lr_0 = 6.1175e-04
Loss = 3.5680e-02, PNorm = 141.5851, GNorm = 1.1123, lr_0 = 6.1134e-04
Loss = 2.7700e-02, PNorm = 141.6447, GNorm = 0.3152, lr_0 = 6.1092e-04
Loss = 2.4540e-02, PNorm = 141.7035, GNorm = 0.3979, lr_0 = 6.1050e-04
Validation mae = 0.284224
Epoch 8
Loss = 2.3626e-02, PNorm = 141.7474, GNorm = 0.2367, lr_0 = 6.1008e-04
Loss = 2.7212e-02, PNorm = 141.7946, GNorm = 0.4992, lr_0 = 6.0966e-04
Loss = 2.5320e-02, PNorm = 141.8329, GNorm = 0.2679, lr_0 = 6.0924e-04
Loss = 2.0076e-02, PNorm = 141.8746, GNorm = 0.1822, lr_0 = 6.0883e-04
Loss = 2.0904e-02, PNorm = 141.9106, GNorm = 0.6852, lr_0 = 6.0841e-04
Loss = 2.3573e-02, PNorm = 141.9415, GNorm = 0.6950, lr_0 = 6.0799e-04
Loss = 2.2330e-02, PNorm = 141.9809, GNorm = 0.2641, lr_0 = 6.0758e-04
Loss = 2.1497e-02, PNorm = 142.0176, GNorm = 0.2565, lr_0 = 6.0716e-04
Loss = 2.3993e-02, PNorm = 142.0589, GNorm = 0.7774, lr_0 = 6.0674e-04
Loss = 2.4318e-02, PNorm = 142.0967, GNorm = 0.2471, lr_0 = 6.0633e-04
Loss = 2.0141e-02, PNorm = 142.1334, GNorm = 0.3608, lr_0 = 6.0591e-04
Loss = 2.2429e-02, PNorm = 142.1701, GNorm = 0.4813, lr_0 = 6.0550e-04
Loss = 2.1441e-02, PNorm = 142.2086, GNorm = 0.4494, lr_0 = 6.0508e-04
Loss = 2.6925e-02, PNorm = 142.2504, GNorm = 0.6523, lr_0 = 6.0467e-04
Loss = 1.9126e-02, PNorm = 142.2879, GNorm = 0.2696, lr_0 = 6.0425e-04
Loss = 2.4184e-02, PNorm = 142.3236, GNorm = 0.3229, lr_0 = 6.0384e-04
Loss = 2.2211e-02, PNorm = 142.3599, GNorm = 1.1473, lr_0 = 6.0343e-04
Loss = 1.9629e-02, PNorm = 142.3999, GNorm = 0.7187, lr_0 = 6.0301e-04
Loss = 1.9907e-02, PNorm = 142.4379, GNorm = 0.6056, lr_0 = 6.0260e-04
Loss = 2.3110e-02, PNorm = 142.4736, GNorm = 1.4749, lr_0 = 6.0219e-04
Loss = 1.9729e-02, PNorm = 142.5109, GNorm = 0.2633, lr_0 = 6.0178e-04
Loss = 2.0107e-02, PNorm = 142.5537, GNorm = 0.7608, lr_0 = 6.0136e-04
Loss = 1.9853e-02, PNorm = 142.5900, GNorm = 0.4474, lr_0 = 6.0095e-04
Loss = 1.9716e-02, PNorm = 142.6282, GNorm = 0.3869, lr_0 = 6.0054e-04
Loss = 2.0579e-02, PNorm = 142.6644, GNorm = 0.6912, lr_0 = 6.0013e-04
Loss = 2.4807e-02, PNorm = 142.7055, GNorm = 0.2489, lr_0 = 5.9972e-04
Loss = 1.9222e-02, PNorm = 142.7485, GNorm = 0.2971, lr_0 = 5.9931e-04
Loss = 1.7318e-02, PNorm = 142.7887, GNorm = 0.3252, lr_0 = 5.9890e-04
Loss = 1.8766e-02, PNorm = 142.8233, GNorm = 0.3163, lr_0 = 5.9849e-04
Loss = 1.7332e-02, PNorm = 142.8598, GNorm = 0.1635, lr_0 = 5.9808e-04
Loss = 1.8764e-02, PNorm = 142.8972, GNorm = 0.2590, lr_0 = 5.9767e-04
Loss = 2.2242e-02, PNorm = 142.9375, GNorm = 0.3767, lr_0 = 5.9726e-04
Loss = 2.2911e-02, PNorm = 142.9775, GNorm = 0.5225, lr_0 = 5.9685e-04
Loss = 1.9089e-02, PNorm = 143.0229, GNorm = 0.5715, lr_0 = 5.9644e-04
Loss = 2.3217e-02, PNorm = 143.0660, GNorm = 0.3162, lr_0 = 5.9603e-04
Loss = 1.7619e-02, PNorm = 143.1097, GNorm = 0.4032, lr_0 = 5.9562e-04
Loss = 2.0936e-02, PNorm = 143.1465, GNorm = 0.4508, lr_0 = 5.9521e-04
Loss = 2.0656e-02, PNorm = 143.1924, GNorm = 0.6251, lr_0 = 5.9481e-04
Loss = 1.9663e-02, PNorm = 143.2313, GNorm = 0.4247, lr_0 = 5.9440e-04
Loss = 2.0380e-02, PNorm = 143.2802, GNorm = 0.4446, lr_0 = 5.9399e-04
Loss = 2.5339e-02, PNorm = 143.3273, GNorm = 0.4682, lr_0 = 5.9358e-04
Loss = 2.3681e-02, PNorm = 143.3719, GNorm = 0.2508, lr_0 = 5.9318e-04
Loss = 1.8463e-02, PNorm = 143.4139, GNorm = 0.5694, lr_0 = 5.9277e-04
Loss = 2.2503e-02, PNorm = 143.4632, GNorm = 0.2636, lr_0 = 5.9236e-04
Loss = 2.0259e-02, PNorm = 143.5109, GNorm = 0.3563, lr_0 = 5.9196e-04
Loss = 2.5585e-02, PNorm = 143.5497, GNorm = 0.3943, lr_0 = 5.9155e-04
Loss = 1.8014e-02, PNorm = 143.5866, GNorm = 0.6125, lr_0 = 5.9115e-04
Loss = 2.2478e-02, PNorm = 143.6253, GNorm = 0.3335, lr_0 = 5.9074e-04
Loss = 1.8802e-02, PNorm = 143.6679, GNorm = 0.2121, lr_0 = 5.9034e-04
Loss = 2.5996e-02, PNorm = 143.7082, GNorm = 1.1382, lr_0 = 5.8993e-04
Loss = 2.1688e-02, PNorm = 143.7514, GNorm = 0.2646, lr_0 = 5.8953e-04
Loss = 2.6278e-02, PNorm = 143.7989, GNorm = 0.6886, lr_0 = 5.8913e-04
Loss = 2.1675e-02, PNorm = 143.8428, GNorm = 0.1975, lr_0 = 5.8872e-04
Loss = 2.4521e-02, PNorm = 143.8890, GNorm = 0.3137, lr_0 = 5.8832e-04
Loss = 2.1717e-02, PNorm = 143.9363, GNorm = 0.3409, lr_0 = 5.8792e-04
Loss = 1.9827e-02, PNorm = 143.9797, GNorm = 0.3714, lr_0 = 5.8751e-04
Loss = 2.0195e-02, PNorm = 144.0203, GNorm = 0.3336, lr_0 = 5.8711e-04
Loss = 2.3691e-02, PNorm = 144.0666, GNorm = 0.5330, lr_0 = 5.8671e-04
Loss = 2.2173e-02, PNorm = 144.1127, GNorm = 0.4341, lr_0 = 5.8631e-04
Loss = 2.1664e-02, PNorm = 144.1643, GNorm = 0.6895, lr_0 = 5.8591e-04
Loss = 2.2712e-02, PNorm = 144.2107, GNorm = 0.4636, lr_0 = 5.8550e-04
Loss = 2.2303e-02, PNorm = 144.2572, GNorm = 0.4771, lr_0 = 5.8510e-04
Loss = 2.3270e-02, PNorm = 144.3040, GNorm = 0.3582, lr_0 = 5.8470e-04
Loss = 2.2457e-02, PNorm = 144.3490, GNorm = 0.2556, lr_0 = 5.8430e-04
Loss = 2.4287e-02, PNorm = 144.3937, GNorm = 0.3180, lr_0 = 5.8390e-04
Loss = 2.0195e-02, PNorm = 144.4392, GNorm = 0.4580, lr_0 = 5.8350e-04
Loss = 2.3208e-02, PNorm = 144.4815, GNorm = 0.5305, lr_0 = 5.8310e-04
Loss = 1.8965e-02, PNorm = 144.5283, GNorm = 0.2934, lr_0 = 5.8270e-04
Loss = 2.4160e-02, PNorm = 144.5765, GNorm = 0.2270, lr_0 = 5.8230e-04
Loss = 1.9647e-02, PNorm = 144.6267, GNorm = 0.3359, lr_0 = 5.8190e-04
Loss = 2.4895e-02, PNorm = 144.6698, GNorm = 1.1676, lr_0 = 5.8151e-04
Loss = 2.2313e-02, PNorm = 144.7119, GNorm = 0.3883, lr_0 = 5.8111e-04
Loss = 2.0082e-02, PNorm = 144.7624, GNorm = 0.2923, lr_0 = 5.8071e-04
Loss = 2.1595e-02, PNorm = 144.8118, GNorm = 0.5196, lr_0 = 5.8031e-04
Loss = 1.9324e-02, PNorm = 144.8586, GNorm = 0.3006, lr_0 = 5.7991e-04
Loss = 2.3929e-02, PNorm = 144.9046, GNorm = 0.2210, lr_0 = 5.7952e-04
Loss = 2.0155e-02, PNorm = 144.9535, GNorm = 0.3746, lr_0 = 5.7912e-04
Loss = 1.7866e-02, PNorm = 145.0017, GNorm = 0.2910, lr_0 = 5.7872e-04
Loss = 2.3872e-02, PNorm = 145.0457, GNorm = 0.1276, lr_0 = 5.7833e-04
Loss = 2.4558e-02, PNorm = 145.0894, GNorm = 0.7319, lr_0 = 5.7793e-04
Loss = 2.3704e-02, PNorm = 145.1317, GNorm = 0.6746, lr_0 = 5.7753e-04
Loss = 2.2597e-02, PNorm = 145.1786, GNorm = 0.6875, lr_0 = 5.7714e-04
Loss = 1.7260e-02, PNorm = 145.2259, GNorm = 0.4015, lr_0 = 5.7674e-04
Loss = 2.2708e-02, PNorm = 145.2692, GNorm = 0.2005, lr_0 = 5.7635e-04
Loss = 2.1648e-02, PNorm = 145.3141, GNorm = 0.5623, lr_0 = 5.7595e-04
Loss = 2.3195e-02, PNorm = 145.3644, GNorm = 0.5483, lr_0 = 5.7556e-04
Loss = 2.0253e-02, PNorm = 145.4170, GNorm = 0.5667, lr_0 = 5.7516e-04
Loss = 1.9601e-02, PNorm = 145.4636, GNorm = 0.2864, lr_0 = 5.7477e-04
Loss = 2.5182e-02, PNorm = 145.5147, GNorm = 0.3501, lr_0 = 5.7438e-04
Loss = 2.7437e-02, PNorm = 145.5717, GNorm = 1.3328, lr_0 = 5.7398e-04
Loss = 1.9002e-02, PNorm = 145.6224, GNorm = 0.2761, lr_0 = 5.7359e-04
Loss = 1.9559e-02, PNorm = 145.6708, GNorm = 0.4167, lr_0 = 5.7320e-04
Loss = 1.8924e-02, PNorm = 145.7161, GNorm = 0.2970, lr_0 = 5.7280e-04
Loss = 2.7563e-02, PNorm = 145.7627, GNorm = 0.4384, lr_0 = 5.7241e-04
Loss = 2.1033e-02, PNorm = 145.8188, GNorm = 0.5403, lr_0 = 5.7202e-04
Loss = 2.4013e-02, PNorm = 145.8727, GNorm = 0.6940, lr_0 = 5.7163e-04
Loss = 2.3086e-02, PNorm = 145.9274, GNorm = 0.2875, lr_0 = 5.7124e-04
Loss = 1.9024e-02, PNorm = 145.9764, GNorm = 0.3352, lr_0 = 5.7084e-04
Loss = 2.2090e-02, PNorm = 146.0162, GNorm = 0.5120, lr_0 = 5.7045e-04
Loss = 2.0090e-02, PNorm = 146.0638, GNorm = 0.9332, lr_0 = 5.7006e-04
Loss = 2.3326e-02, PNorm = 146.1075, GNorm = 0.2621, lr_0 = 5.6967e-04
Loss = 2.0353e-02, PNorm = 146.1505, GNorm = 0.3792, lr_0 = 5.6928e-04
Loss = 1.8846e-02, PNorm = 146.1912, GNorm = 0.1855, lr_0 = 5.6889e-04
Loss = 2.4049e-02, PNorm = 146.2326, GNorm = 0.3582, lr_0 = 5.6850e-04
Loss = 2.3964e-02, PNorm = 146.2829, GNorm = 0.3604, lr_0 = 5.6811e-04
Loss = 2.5213e-02, PNorm = 146.3255, GNorm = 0.7610, lr_0 = 5.6772e-04
Loss = 2.6002e-02, PNorm = 146.3745, GNorm = 0.5924, lr_0 = 5.6733e-04
Loss = 2.1470e-02, PNorm = 146.4274, GNorm = 0.3321, lr_0 = 5.6695e-04
Loss = 1.9929e-02, PNorm = 146.4853, GNorm = 0.3989, lr_0 = 5.6656e-04
Loss = 2.2211e-02, PNorm = 146.5398, GNorm = 0.2867, lr_0 = 5.6617e-04
Loss = 2.1444e-02, PNorm = 146.5879, GNorm = 0.1864, lr_0 = 5.6578e-04
Loss = 1.8740e-02, PNorm = 146.6323, GNorm = 0.3212, lr_0 = 5.6539e-04
Loss = 2.2269e-02, PNorm = 146.6735, GNorm = 0.4314, lr_0 = 5.6501e-04
Loss = 2.9111e-02, PNorm = 146.7168, GNorm = 0.6510, lr_0 = 5.6462e-04
Loss = 1.7564e-02, PNorm = 146.7647, GNorm = 0.4316, lr_0 = 5.6423e-04
Loss = 2.1160e-02, PNorm = 146.8076, GNorm = 0.5621, lr_0 = 5.6385e-04
Loss = 1.8608e-02, PNorm = 146.8463, GNorm = 0.1901, lr_0 = 5.6346e-04
Loss = 2.0725e-02, PNorm = 146.8898, GNorm = 0.1973, lr_0 = 5.6307e-04
Loss = 2.1692e-02, PNorm = 146.9335, GNorm = 0.6045, lr_0 = 5.6269e-04
Loss = 2.2384e-02, PNorm = 146.9761, GNorm = 0.1591, lr_0 = 5.6230e-04
Validation mae = 0.283490
Epoch 9
Loss = 1.8823e-02, PNorm = 147.0136, GNorm = 0.6586, lr_0 = 5.6192e-04
Loss = 1.7127e-02, PNorm = 147.0407, GNorm = 0.4813, lr_0 = 5.6153e-04
Loss = 1.8345e-02, PNorm = 147.0721, GNorm = 0.2565, lr_0 = 5.6115e-04
Loss = 1.7375e-02, PNorm = 147.1021, GNorm = 0.3699, lr_0 = 5.6076e-04
Loss = 1.9296e-02, PNorm = 147.1335, GNorm = 0.3132, lr_0 = 5.6038e-04
Loss = 1.8755e-02, PNorm = 147.1638, GNorm = 0.2082, lr_0 = 5.6000e-04
Loss = 1.4589e-02, PNorm = 147.1959, GNorm = 0.2194, lr_0 = 5.5961e-04
Loss = 1.7833e-02, PNorm = 147.2282, GNorm = 0.2973, lr_0 = 5.5923e-04
Loss = 2.0520e-02, PNorm = 147.2619, GNorm = 0.8464, lr_0 = 5.5885e-04
Loss = 1.7873e-02, PNorm = 147.2970, GNorm = 0.2748, lr_0 = 5.5846e-04
Loss = 2.0514e-02, PNorm = 147.3332, GNorm = 0.4302, lr_0 = 5.5808e-04
Loss = 1.7716e-02, PNorm = 147.3663, GNorm = 0.4380, lr_0 = 5.5770e-04
Loss = 2.0117e-02, PNorm = 147.4004, GNorm = 0.6131, lr_0 = 5.5732e-04
Loss = 1.7802e-02, PNorm = 147.4306, GNorm = 0.3908, lr_0 = 5.5693e-04
Loss = 1.6830e-02, PNorm = 147.4658, GNorm = 0.1792, lr_0 = 5.5655e-04
Loss = 1.6928e-02, PNorm = 147.5011, GNorm = 0.1833, lr_0 = 5.5617e-04
Loss = 1.7702e-02, PNorm = 147.5359, GNorm = 0.2917, lr_0 = 5.5579e-04
Loss = 1.7524e-02, PNorm = 147.5663, GNorm = 0.3292, lr_0 = 5.5541e-04
Loss = 1.5219e-02, PNorm = 147.5971, GNorm = 0.2495, lr_0 = 5.5503e-04
Loss = 1.6199e-02, PNorm = 147.6285, GNorm = 0.3413, lr_0 = 5.5465e-04
Loss = 1.9171e-02, PNorm = 147.6631, GNorm = 0.2303, lr_0 = 5.5427e-04
Loss = 1.9285e-02, PNorm = 147.6989, GNorm = 0.3948, lr_0 = 5.5389e-04
Loss = 1.5437e-02, PNorm = 147.7350, GNorm = 0.4999, lr_0 = 5.5351e-04
Loss = 1.6993e-02, PNorm = 147.7732, GNorm = 0.3466, lr_0 = 5.5313e-04
Loss = 1.8295e-02, PNorm = 147.8085, GNorm = 0.6336, lr_0 = 5.5275e-04
Loss = 1.8018e-02, PNorm = 147.8495, GNorm = 0.2794, lr_0 = 5.5237e-04
Loss = 1.8300e-02, PNorm = 147.8828, GNorm = 0.2812, lr_0 = 5.5199e-04
Loss = 1.4997e-02, PNorm = 147.9179, GNorm = 0.2484, lr_0 = 5.5162e-04
Loss = 2.3036e-02, PNorm = 147.9582, GNorm = 0.3689, lr_0 = 5.5124e-04
Loss = 1.6645e-02, PNorm = 147.9963, GNorm = 0.3962, lr_0 = 5.5086e-04
Loss = 1.5625e-02, PNorm = 148.0347, GNorm = 0.3301, lr_0 = 5.5048e-04
Loss = 1.6078e-02, PNorm = 148.0629, GNorm = 0.3845, lr_0 = 5.5011e-04
Loss = 1.7155e-02, PNorm = 148.0905, GNorm = 0.9030, lr_0 = 5.4973e-04
Loss = 1.8058e-02, PNorm = 148.1223, GNorm = 0.1759, lr_0 = 5.4935e-04
Loss = 1.6489e-02, PNorm = 148.1533, GNorm = 0.2085, lr_0 = 5.4898e-04
Loss = 1.5864e-02, PNorm = 148.1930, GNorm = 0.5742, lr_0 = 5.4860e-04
Loss = 1.6501e-02, PNorm = 148.2294, GNorm = 0.3443, lr_0 = 5.4822e-04
Loss = 1.9026e-02, PNorm = 148.2639, GNorm = 0.6459, lr_0 = 5.4785e-04
Loss = 2.0206e-02, PNorm = 148.3022, GNorm = 0.2131, lr_0 = 5.4747e-04
Loss = 1.9435e-02, PNorm = 148.3411, GNorm = 0.5746, lr_0 = 5.4710e-04
Loss = 1.4179e-02, PNorm = 148.3832, GNorm = 0.3259, lr_0 = 5.4672e-04
Loss = 1.7443e-02, PNorm = 148.4210, GNorm = 0.1690, lr_0 = 5.4635e-04
Loss = 1.7088e-02, PNorm = 148.4600, GNorm = 0.4707, lr_0 = 5.4597e-04
Loss = 1.4208e-02, PNorm = 148.4939, GNorm = 0.3794, lr_0 = 5.4560e-04
Loss = 1.7076e-02, PNorm = 148.5228, GNorm = 0.4596, lr_0 = 5.4523e-04
Loss = 1.5337e-02, PNorm = 148.5539, GNorm = 0.6734, lr_0 = 5.4485e-04
Loss = 1.4966e-02, PNorm = 148.5867, GNorm = 0.3898, lr_0 = 5.4448e-04
Loss = 1.9166e-02, PNorm = 148.6227, GNorm = 0.3584, lr_0 = 5.4411e-04
Loss = 1.8689e-02, PNorm = 148.6617, GNorm = 0.6356, lr_0 = 5.4373e-04
Loss = 1.8427e-02, PNorm = 148.6995, GNorm = 0.1695, lr_0 = 5.4336e-04
Loss = 1.3893e-02, PNorm = 148.7400, GNorm = 0.5978, lr_0 = 5.4299e-04
Loss = 1.6114e-02, PNorm = 148.7740, GNorm = 0.2743, lr_0 = 5.4262e-04
Loss = 1.5994e-02, PNorm = 148.8074, GNorm = 0.8078, lr_0 = 5.4225e-04
Loss = 1.5152e-02, PNorm = 148.8375, GNorm = 0.2671, lr_0 = 5.4187e-04
Loss = 1.4783e-02, PNorm = 148.8708, GNorm = 0.3787, lr_0 = 5.4150e-04
Loss = 1.7606e-02, PNorm = 148.9009, GNorm = 0.2360, lr_0 = 5.4113e-04
Loss = 1.9461e-02, PNorm = 148.9376, GNorm = 0.5239, lr_0 = 5.4076e-04
Loss = 1.8515e-02, PNorm = 148.9757, GNorm = 0.3937, lr_0 = 5.4039e-04
Loss = 1.6474e-02, PNorm = 149.0139, GNorm = 0.1895, lr_0 = 5.4002e-04
Loss = 1.9141e-02, PNorm = 149.0564, GNorm = 0.1867, lr_0 = 5.3965e-04
Loss = 1.4978e-02, PNorm = 149.0985, GNorm = 0.4323, lr_0 = 5.3928e-04
Loss = 1.5907e-02, PNorm = 149.1345, GNorm = 0.4089, lr_0 = 5.3891e-04
Loss = 1.7302e-02, PNorm = 149.1687, GNorm = 0.2752, lr_0 = 5.3854e-04
Loss = 1.8221e-02, PNorm = 149.2052, GNorm = 0.3856, lr_0 = 5.3817e-04
Loss = 1.4944e-02, PNorm = 149.2432, GNorm = 0.2617, lr_0 = 5.3781e-04
Loss = 1.6017e-02, PNorm = 149.2785, GNorm = 0.4077, lr_0 = 5.3744e-04
Loss = 1.7602e-02, PNorm = 149.3236, GNorm = 0.3310, lr_0 = 5.3707e-04
Loss = 1.7180e-02, PNorm = 149.3655, GNorm = 0.1890, lr_0 = 5.3670e-04
Loss = 1.5711e-02, PNorm = 149.4064, GNorm = 0.2984, lr_0 = 5.3633e-04
Loss = 2.0667e-02, PNorm = 149.4419, GNorm = 0.6267, lr_0 = 5.3597e-04
Loss = 1.5199e-02, PNorm = 149.4841, GNorm = 0.5813, lr_0 = 5.3560e-04
Loss = 1.5139e-02, PNorm = 149.5233, GNorm = 0.3229, lr_0 = 5.3523e-04
Loss = 2.6307e-02, PNorm = 149.5646, GNorm = 0.3624, lr_0 = 5.3486e-04
Loss = 2.1882e-02, PNorm = 149.6044, GNorm = 0.4414, lr_0 = 5.3450e-04
Loss = 1.7048e-02, PNorm = 149.6431, GNorm = 0.2371, lr_0 = 5.3413e-04
Loss = 1.9481e-02, PNorm = 149.6820, GNorm = 0.9217, lr_0 = 5.3377e-04
Loss = 1.6525e-02, PNorm = 149.7149, GNorm = 0.3521, lr_0 = 5.3340e-04
Loss = 1.8394e-02, PNorm = 149.7604, GNorm = 0.3106, lr_0 = 5.3304e-04
Loss = 1.8200e-02, PNorm = 149.8027, GNorm = 0.2919, lr_0 = 5.3267e-04
Loss = 1.7474e-02, PNorm = 149.8404, GNorm = 0.2964, lr_0 = 5.3231e-04
Loss = 1.6837e-02, PNorm = 149.8818, GNorm = 0.4247, lr_0 = 5.3194e-04
Loss = 2.1954e-02, PNorm = 149.9223, GNorm = 0.3399, lr_0 = 5.3158e-04
Loss = 1.7623e-02, PNorm = 149.9715, GNorm = 0.4884, lr_0 = 5.3121e-04
Loss = 1.6301e-02, PNorm = 150.0126, GNorm = 0.3874, lr_0 = 5.3085e-04
Loss = 1.8215e-02, PNorm = 150.0499, GNorm = 0.3951, lr_0 = 5.3048e-04
Loss = 2.2470e-02, PNorm = 150.0911, GNorm = 0.3117, lr_0 = 5.3012e-04
Loss = 1.7724e-02, PNorm = 150.1319, GNorm = 0.3845, lr_0 = 5.2976e-04
Loss = 2.0605e-02, PNorm = 150.1780, GNorm = 0.2679, lr_0 = 5.2939e-04
Loss = 1.8140e-02, PNorm = 150.2237, GNorm = 0.1616, lr_0 = 5.2903e-04
Loss = 2.0788e-02, PNorm = 150.2664, GNorm = 0.3191, lr_0 = 5.2867e-04
Loss = 1.6544e-02, PNorm = 150.3136, GNorm = 0.6561, lr_0 = 5.2831e-04
Loss = 1.3846e-02, PNorm = 150.3569, GNorm = 0.5809, lr_0 = 5.2795e-04
Loss = 1.7183e-02, PNorm = 150.3962, GNorm = 0.2938, lr_0 = 5.2758e-04
Loss = 2.0838e-02, PNorm = 150.4370, GNorm = 0.5748, lr_0 = 5.2722e-04
Loss = 2.0436e-02, PNorm = 150.4785, GNorm = 0.8914, lr_0 = 5.2686e-04
Loss = 1.5491e-02, PNorm = 150.5221, GNorm = 0.3769, lr_0 = 5.2650e-04
Loss = 2.0589e-02, PNorm = 150.5661, GNorm = 0.3384, lr_0 = 5.2614e-04
Loss = 1.6177e-02, PNorm = 150.6086, GNorm = 0.2122, lr_0 = 5.2578e-04
Loss = 2.0812e-02, PNorm = 150.6541, GNorm = 0.2096, lr_0 = 5.2542e-04
Loss = 1.5781e-02, PNorm = 150.6980, GNorm = 0.2823, lr_0 = 5.2506e-04
Loss = 2.0080e-02, PNorm = 150.7421, GNorm = 0.3413, lr_0 = 5.2470e-04
Loss = 1.7180e-02, PNorm = 150.7817, GNorm = 0.6048, lr_0 = 5.2434e-04
Loss = 1.7374e-02, PNorm = 150.8196, GNorm = 0.5352, lr_0 = 5.2398e-04
Loss = 2.2513e-02, PNorm = 150.8601, GNorm = 0.3332, lr_0 = 5.2362e-04
Loss = 1.4746e-02, PNorm = 150.8969, GNorm = 0.2782, lr_0 = 5.2326e-04
Loss = 1.9433e-02, PNorm = 150.9338, GNorm = 0.2742, lr_0 = 5.2290e-04
Loss = 1.8465e-02, PNorm = 150.9751, GNorm = 0.5957, lr_0 = 5.2255e-04
Loss = 1.7797e-02, PNorm = 151.0185, GNorm = 0.4768, lr_0 = 5.2219e-04
Loss = 1.6627e-02, PNorm = 151.0660, GNorm = 0.3195, lr_0 = 5.2183e-04
Loss = 1.8004e-02, PNorm = 151.1061, GNorm = 0.2104, lr_0 = 5.2147e-04
Loss = 2.1447e-02, PNorm = 151.1435, GNorm = 0.3782, lr_0 = 5.2112e-04
Loss = 1.8610e-02, PNorm = 151.1835, GNorm = 0.6201, lr_0 = 5.2076e-04
Loss = 1.9507e-02, PNorm = 151.2249, GNorm = 0.4672, lr_0 = 5.2040e-04
Loss = 2.1794e-02, PNorm = 151.2695, GNorm = 0.2416, lr_0 = 5.2005e-04
Loss = 2.2962e-02, PNorm = 151.3128, GNorm = 0.5908, lr_0 = 5.1969e-04
Loss = 1.6094e-02, PNorm = 151.3570, GNorm = 0.2611, lr_0 = 5.1933e-04
Loss = 1.9425e-02, PNorm = 151.3975, GNorm = 0.9909, lr_0 = 5.1898e-04
Loss = 1.9039e-02, PNorm = 151.4397, GNorm = 0.4206, lr_0 = 5.1862e-04
Loss = 1.8027e-02, PNorm = 151.4775, GNorm = 0.2615, lr_0 = 5.1827e-04
Loss = 1.7496e-02, PNorm = 151.5258, GNorm = 0.4078, lr_0 = 5.1791e-04
Validation mae = 0.282796
Epoch 10
Loss = 1.5493e-02, PNorm = 151.5628, GNorm = 0.2514, lr_0 = 5.1756e-04
Loss = 1.7628e-02, PNorm = 151.5898, GNorm = 0.3052, lr_0 = 5.1720e-04
Loss = 1.5761e-02, PNorm = 151.6242, GNorm = 0.5205, lr_0 = 5.1685e-04
Loss = 1.7672e-02, PNorm = 151.6544, GNorm = 0.2202, lr_0 = 5.1649e-04
Loss = 1.5133e-02, PNorm = 151.6823, GNorm = 0.2159, lr_0 = 5.1614e-04
Loss = 1.4396e-02, PNorm = 151.7072, GNorm = 0.3995, lr_0 = 5.1579e-04
Loss = 1.5612e-02, PNorm = 151.7280, GNorm = 0.4631, lr_0 = 5.1543e-04
Loss = 1.2921e-02, PNorm = 151.7540, GNorm = 0.2728, lr_0 = 5.1508e-04
Loss = 1.6703e-02, PNorm = 151.7813, GNorm = 0.2058, lr_0 = 5.1473e-04
Loss = 1.5845e-02, PNorm = 151.8143, GNorm = 0.2009, lr_0 = 5.1437e-04
Loss = 1.4923e-02, PNorm = 151.8490, GNorm = 0.1970, lr_0 = 5.1402e-04
Loss = 1.2439e-02, PNorm = 151.8794, GNorm = 0.2457, lr_0 = 5.1367e-04
Loss = 1.3088e-02, PNorm = 151.9040, GNorm = 0.4232, lr_0 = 5.1332e-04
Loss = 1.5462e-02, PNorm = 151.9273, GNorm = 0.1638, lr_0 = 5.1297e-04
Loss = 1.4327e-02, PNorm = 151.9522, GNorm = 0.4160, lr_0 = 5.1262e-04
Loss = 1.7603e-02, PNorm = 151.9753, GNorm = 0.2423, lr_0 = 5.1226e-04
Loss = 1.6222e-02, PNorm = 152.0045, GNorm = 0.3913, lr_0 = 5.1191e-04
Loss = 1.3002e-02, PNorm = 152.0303, GNorm = 0.3990, lr_0 = 5.1156e-04
Loss = 1.4124e-02, PNorm = 152.0527, GNorm = 0.2178, lr_0 = 5.1121e-04
Loss = 1.6565e-02, PNorm = 152.0771, GNorm = 0.2323, lr_0 = 5.1086e-04
Loss = 1.2709e-02, PNorm = 152.0999, GNorm = 0.3366, lr_0 = 5.1051e-04
Loss = 1.3684e-02, PNorm = 152.1249, GNorm = 0.2416, lr_0 = 5.1016e-04
Loss = 1.5362e-02, PNorm = 152.1509, GNorm = 0.2914, lr_0 = 5.0981e-04
Loss = 1.5588e-02, PNorm = 152.1809, GNorm = 0.2387, lr_0 = 5.0946e-04
Loss = 1.2658e-02, PNorm = 152.2079, GNorm = 0.1686, lr_0 = 5.0911e-04
Loss = 1.6126e-02, PNorm = 152.2352, GNorm = 0.3980, lr_0 = 5.0877e-04
Loss = 1.4188e-02, PNorm = 152.2571, GNorm = 0.2841, lr_0 = 5.0842e-04
Loss = 1.4745e-02, PNorm = 152.2856, GNorm = 0.2600, lr_0 = 5.0807e-04
Loss = 1.2854e-02, PNorm = 152.3154, GNorm = 0.1767, lr_0 = 5.0772e-04
Loss = 1.2986e-02, PNorm = 152.3411, GNorm = 0.4386, lr_0 = 5.0737e-04
Loss = 1.3108e-02, PNorm = 152.3656, GNorm = 0.3751, lr_0 = 5.0703e-04
Loss = 1.7980e-02, PNorm = 152.3941, GNorm = 0.2363, lr_0 = 5.0668e-04
Loss = 1.4775e-02, PNorm = 152.4252, GNorm = 0.2036, lr_0 = 5.0633e-04
Loss = 1.4693e-02, PNorm = 152.4516, GNorm = 0.3803, lr_0 = 5.0598e-04
Loss = 1.4906e-02, PNorm = 152.4824, GNorm = 0.4788, lr_0 = 5.0564e-04
Loss = 1.4304e-02, PNorm = 152.5070, GNorm = 0.5819, lr_0 = 5.0529e-04
Loss = 1.5212e-02, PNorm = 152.5430, GNorm = 0.3755, lr_0 = 5.0494e-04
Loss = 1.3399e-02, PNorm = 152.5721, GNorm = 0.2492, lr_0 = 5.0460e-04
Loss = 1.3764e-02, PNorm = 152.5985, GNorm = 0.3250, lr_0 = 5.0425e-04
Loss = 1.2279e-02, PNorm = 152.6289, GNorm = 0.3157, lr_0 = 5.0391e-04
Loss = 1.6235e-02, PNorm = 152.6521, GNorm = 0.2244, lr_0 = 5.0356e-04
Loss = 1.4931e-02, PNorm = 152.6832, GNorm = 0.4367, lr_0 = 5.0322e-04
Loss = 1.3435e-02, PNorm = 152.7138, GNorm = 0.2721, lr_0 = 5.0287e-04
Loss = 1.5605e-02, PNorm = 152.7496, GNorm = 0.4727, lr_0 = 5.0253e-04
Loss = 1.4049e-02, PNorm = 152.7802, GNorm = 0.2073, lr_0 = 5.0218e-04
Loss = 1.5009e-02, PNorm = 152.8147, GNorm = 0.5993, lr_0 = 5.0184e-04
Loss = 1.4728e-02, PNorm = 152.8431, GNorm = 0.1606, lr_0 = 5.0150e-04
Loss = 1.2493e-02, PNorm = 152.8749, GNorm = 0.1910, lr_0 = 5.0115e-04
Loss = 1.3269e-02, PNorm = 152.8996, GNorm = 0.4786, lr_0 = 5.0081e-04
Loss = 1.3644e-02, PNorm = 152.9327, GNorm = 0.4942, lr_0 = 5.0047e-04
Loss = 1.4971e-02, PNorm = 152.9617, GNorm = 0.2570, lr_0 = 5.0012e-04
Loss = 1.4655e-02, PNorm = 152.9937, GNorm = 0.2669, lr_0 = 4.9978e-04
Loss = 1.6063e-02, PNorm = 153.0205, GNorm = 0.4208, lr_0 = 4.9944e-04
Loss = 1.1813e-02, PNorm = 153.0465, GNorm = 0.2883, lr_0 = 4.9910e-04
Loss = 1.4263e-02, PNorm = 153.0714, GNorm = 0.3196, lr_0 = 4.9875e-04
Loss = 1.3911e-02, PNorm = 153.1025, GNorm = 0.1238, lr_0 = 4.9841e-04
Loss = 1.3274e-02, PNorm = 153.1315, GNorm = 0.2825, lr_0 = 4.9807e-04
Loss = 1.5256e-02, PNorm = 153.1621, GNorm = 0.1785, lr_0 = 4.9773e-04
Loss = 1.4927e-02, PNorm = 153.1903, GNorm = 0.2566, lr_0 = 4.9739e-04
Loss = 1.1328e-02, PNorm = 153.2218, GNorm = 0.3101, lr_0 = 4.9705e-04
Loss = 1.3492e-02, PNorm = 153.2526, GNorm = 0.1209, lr_0 = 4.9671e-04
Loss = 1.2186e-02, PNorm = 153.2832, GNorm = 0.2668, lr_0 = 4.9637e-04
Loss = 1.1619e-02, PNorm = 153.3099, GNorm = 0.3656, lr_0 = 4.9603e-04
Loss = 1.2159e-02, PNorm = 153.3356, GNorm = 0.2092, lr_0 = 4.9569e-04
Loss = 1.3058e-02, PNorm = 153.3668, GNorm = 0.1541, lr_0 = 4.9535e-04
Loss = 1.5105e-02, PNorm = 153.3992, GNorm = 0.8417, lr_0 = 4.9501e-04
Loss = 1.4233e-02, PNorm = 153.4287, GNorm = 0.1311, lr_0 = 4.9467e-04
Loss = 1.3420e-02, PNorm = 153.4565, GNorm = 0.2432, lr_0 = 4.9433e-04
Loss = 1.3213e-02, PNorm = 153.4900, GNorm = 0.2062, lr_0 = 4.9399e-04
Loss = 1.7007e-02, PNorm = 153.5183, GNorm = 0.3745, lr_0 = 4.9365e-04
Loss = 1.4057e-02, PNorm = 153.5539, GNorm = 0.4144, lr_0 = 4.9332e-04
Loss = 1.2650e-02, PNorm = 153.5864, GNorm = 0.2080, lr_0 = 4.9298e-04
Loss = 1.4011e-02, PNorm = 153.6145, GNorm = 0.6793, lr_0 = 4.9264e-04
Loss = 1.8731e-02, PNorm = 153.6442, GNorm = 0.1820, lr_0 = 4.9230e-04
Loss = 1.6488e-02, PNorm = 153.6749, GNorm = 0.2604, lr_0 = 4.9197e-04
Loss = 1.5253e-02, PNorm = 153.7104, GNorm = 0.3872, lr_0 = 4.9163e-04
Loss = 1.4494e-02, PNorm = 153.7490, GNorm = 0.3200, lr_0 = 4.9129e-04
Loss = 1.5541e-02, PNorm = 153.7819, GNorm = 0.3597, lr_0 = 4.9095e-04
Loss = 1.4026e-02, PNorm = 153.8179, GNorm = 0.1576, lr_0 = 4.9062e-04
Loss = 1.4001e-02, PNorm = 153.8553, GNorm = 0.2209, lr_0 = 4.9028e-04
Loss = 1.2260e-02, PNorm = 153.8924, GNorm = 0.7110, lr_0 = 4.8995e-04
Loss = 1.3195e-02, PNorm = 153.9258, GNorm = 0.2696, lr_0 = 4.8961e-04
Loss = 1.2568e-02, PNorm = 153.9585, GNorm = 0.6416, lr_0 = 4.8928e-04
Loss = 1.5570e-02, PNorm = 153.9884, GNorm = 0.2293, lr_0 = 4.8894e-04
Loss = 1.4760e-02, PNorm = 154.0230, GNorm = 0.5371, lr_0 = 4.8861e-04
Loss = 1.2451e-02, PNorm = 154.0535, GNorm = 0.2176, lr_0 = 4.8827e-04
Loss = 1.6611e-02, PNorm = 154.0892, GNorm = 0.4165, lr_0 = 4.8794e-04
Loss = 1.6045e-02, PNorm = 154.1262, GNorm = 0.3666, lr_0 = 4.8760e-04
Loss = 1.3709e-02, PNorm = 154.1619, GNorm = 0.2402, lr_0 = 4.8727e-04
Loss = 1.3384e-02, PNorm = 154.1934, GNorm = 0.3948, lr_0 = 4.8693e-04
Loss = 1.7668e-02, PNorm = 154.2255, GNorm = 0.3435, lr_0 = 4.8660e-04
Loss = 1.3742e-02, PNorm = 154.2579, GNorm = 0.3243, lr_0 = 4.8627e-04
Loss = 1.3855e-02, PNorm = 154.2883, GNorm = 0.3203, lr_0 = 4.8593e-04
Loss = 1.4704e-02, PNorm = 154.3185, GNorm = 0.4293, lr_0 = 4.8560e-04
Loss = 1.4735e-02, PNorm = 154.3461, GNorm = 0.3166, lr_0 = 4.8527e-04
Loss = 1.6197e-02, PNorm = 154.3742, GNorm = 0.1723, lr_0 = 4.8494e-04
Loss = 1.5022e-02, PNorm = 154.4049, GNorm = 0.2976, lr_0 = 4.8460e-04
Loss = 1.5194e-02, PNorm = 154.4388, GNorm = 0.1870, lr_0 = 4.8427e-04
Loss = 1.8395e-02, PNorm = 154.4778, GNorm = 0.2702, lr_0 = 4.8394e-04
Loss = 1.2543e-02, PNorm = 154.5128, GNorm = 0.2051, lr_0 = 4.8361e-04
Loss = 1.4014e-02, PNorm = 154.5446, GNorm = 0.2793, lr_0 = 4.8328e-04
Loss = 1.5414e-02, PNorm = 154.5727, GNorm = 0.3321, lr_0 = 4.8295e-04
Loss = 1.5686e-02, PNorm = 154.5991, GNorm = 0.3057, lr_0 = 4.8262e-04
Loss = 1.7086e-02, PNorm = 154.6335, GNorm = 0.6383, lr_0 = 4.8228e-04
Loss = 1.7029e-02, PNorm = 154.6696, GNorm = 0.3717, lr_0 = 4.8195e-04
Loss = 1.3952e-02, PNorm = 154.7073, GNorm = 0.3003, lr_0 = 4.8162e-04
Loss = 1.4378e-02, PNorm = 154.7463, GNorm = 0.2384, lr_0 = 4.8129e-04
Loss = 1.4711e-02, PNorm = 154.7860, GNorm = 0.3517, lr_0 = 4.8096e-04
Loss = 1.4379e-02, PNorm = 154.8214, GNorm = 0.1229, lr_0 = 4.8064e-04
Loss = 1.9605e-02, PNorm = 154.8551, GNorm = 0.2181, lr_0 = 4.8031e-04
Loss = 1.3688e-02, PNorm = 154.8874, GNorm = 0.3513, lr_0 = 4.7998e-04
Loss = 1.6335e-02, PNorm = 154.9184, GNorm = 0.4211, lr_0 = 4.7965e-04
Loss = 1.4681e-02, PNorm = 154.9545, GNorm = 0.1863, lr_0 = 4.7932e-04
Loss = 2.1288e-02, PNorm = 154.9957, GNorm = 0.3350, lr_0 = 4.7899e-04
Loss = 1.5054e-02, PNorm = 155.0300, GNorm = 0.2004, lr_0 = 4.7866e-04
Loss = 1.4500e-02, PNorm = 155.0646, GNorm = 0.4897, lr_0 = 4.7833e-04
Loss = 1.8481e-02, PNorm = 155.1005, GNorm = 0.5480, lr_0 = 4.7801e-04
Loss = 1.4872e-02, PNorm = 155.1395, GNorm = 0.1914, lr_0 = 4.7768e-04
Loss = 1.4456e-02, PNorm = 155.1758, GNorm = 0.2346, lr_0 = 4.7735e-04
Loss = 1.3250e-02, PNorm = 155.2120, GNorm = 0.1999, lr_0 = 4.7703e-04
Validation mae = 0.281289
Epoch 11
Loss = 1.1579e-02, PNorm = 155.2413, GNorm = 0.3123, lr_0 = 4.7670e-04
Loss = 1.5379e-02, PNorm = 155.2633, GNorm = 0.2766, lr_0 = 4.7637e-04
Loss = 1.6928e-02, PNorm = 155.2844, GNorm = 0.3673, lr_0 = 4.7605e-04
Loss = 1.1656e-02, PNorm = 155.3109, GNorm = 0.4070, lr_0 = 4.7572e-04
Loss = 1.1551e-02, PNorm = 155.3364, GNorm = 0.1425, lr_0 = 4.7539e-04
Loss = 1.1954e-02, PNorm = 155.3595, GNorm = 0.3368, lr_0 = 4.7507e-04
Loss = 1.4667e-02, PNorm = 155.3842, GNorm = 0.2405, lr_0 = 4.7474e-04
Loss = 1.1193e-02, PNorm = 155.4083, GNorm = 0.4077, lr_0 = 4.7442e-04
Loss = 1.4423e-02, PNorm = 155.4380, GNorm = 0.8000, lr_0 = 4.7409e-04
Loss = 1.3591e-02, PNorm = 155.4628, GNorm = 0.2366, lr_0 = 4.7377e-04
Loss = 1.1348e-02, PNorm = 155.4837, GNorm = 0.3685, lr_0 = 4.7344e-04
Loss = 1.2725e-02, PNorm = 155.5049, GNorm = 0.2016, lr_0 = 4.7312e-04
Loss = 1.2056e-02, PNorm = 155.5266, GNorm = 0.3709, lr_0 = 4.7279e-04
Loss = 1.2046e-02, PNorm = 155.5536, GNorm = 0.1386, lr_0 = 4.7247e-04
Loss = 1.5685e-02, PNorm = 155.5728, GNorm = 0.2694, lr_0 = 4.7215e-04
Loss = 1.1080e-02, PNorm = 155.5939, GNorm = 0.2324, lr_0 = 4.7182e-04
Loss = 1.2199e-02, PNorm = 155.6174, GNorm = 0.2978, lr_0 = 4.7150e-04
Loss = 9.9052e-03, PNorm = 155.6423, GNorm = 0.1239, lr_0 = 4.7118e-04
Loss = 1.0634e-02, PNorm = 155.6717, GNorm = 0.2480, lr_0 = 4.7085e-04
Loss = 1.1346e-02, PNorm = 155.6981, GNorm = 0.3009, lr_0 = 4.7053e-04
Loss = 8.6325e-03, PNorm = 155.7213, GNorm = 0.1599, lr_0 = 4.7021e-04
Loss = 1.2651e-02, PNorm = 155.7394, GNorm = 0.2721, lr_0 = 4.6989e-04
Loss = 1.2755e-02, PNorm = 155.7630, GNorm = 0.3376, lr_0 = 4.6957e-04
Loss = 1.1441e-02, PNorm = 155.7861, GNorm = 0.2456, lr_0 = 4.6924e-04
Loss = 1.1906e-02, PNorm = 155.8145, GNorm = 0.2351, lr_0 = 4.6892e-04
Loss = 1.0023e-02, PNorm = 155.8413, GNorm = 0.4758, lr_0 = 4.6860e-04
Loss = 1.0993e-02, PNorm = 155.8634, GNorm = 0.3014, lr_0 = 4.6828e-04
Loss = 1.1344e-02, PNorm = 155.8819, GNorm = 0.3079, lr_0 = 4.6796e-04
Loss = 1.4772e-02, PNorm = 155.9017, GNorm = 0.5042, lr_0 = 4.6764e-04
Loss = 9.9284e-03, PNorm = 155.9257, GNorm = 0.4103, lr_0 = 4.6732e-04
Loss = 1.1926e-02, PNorm = 155.9509, GNorm = 0.2199, lr_0 = 4.6700e-04
Loss = 1.2231e-02, PNorm = 155.9780, GNorm = 0.6852, lr_0 = 4.6668e-04
Loss = 1.0457e-02, PNorm = 156.0015, GNorm = 0.3863, lr_0 = 4.6636e-04
Loss = 1.2957e-02, PNorm = 156.0211, GNorm = 0.3771, lr_0 = 4.6604e-04
Loss = 1.1757e-02, PNorm = 156.0458, GNorm = 0.3312, lr_0 = 4.6572e-04
Loss = 1.0177e-02, PNorm = 156.0697, GNorm = 0.3797, lr_0 = 4.6540e-04
Loss = 1.1730e-02, PNorm = 156.0921, GNorm = 0.2718, lr_0 = 4.6508e-04
Loss = 1.2382e-02, PNorm = 156.1125, GNorm = 0.2770, lr_0 = 4.6476e-04
Loss = 1.1680e-02, PNorm = 156.1330, GNorm = 0.5471, lr_0 = 4.6445e-04
Loss = 1.3047e-02, PNorm = 156.1585, GNorm = 0.3396, lr_0 = 4.6413e-04
Loss = 1.1756e-02, PNorm = 156.1818, GNorm = 0.1256, lr_0 = 4.6381e-04
Loss = 9.6936e-03, PNorm = 156.2081, GNorm = 0.4327, lr_0 = 4.6349e-04
Loss = 1.1198e-02, PNorm = 156.2346, GNorm = 0.4181, lr_0 = 4.6317e-04
Loss = 1.1355e-02, PNorm = 156.2653, GNorm = 0.4771, lr_0 = 4.6286e-04
Loss = 1.1376e-02, PNorm = 156.2929, GNorm = 0.2657, lr_0 = 4.6254e-04
Loss = 1.2439e-02, PNorm = 156.3166, GNorm = 0.1974, lr_0 = 4.6222e-04
Loss = 1.1699e-02, PNorm = 156.3449, GNorm = 0.2284, lr_0 = 4.6191e-04
Loss = 1.1357e-02, PNorm = 156.3736, GNorm = 0.2439, lr_0 = 4.6159e-04
Loss = 1.2338e-02, PNorm = 156.4020, GNorm = 0.4708, lr_0 = 4.6127e-04
Loss = 1.0540e-02, PNorm = 156.4265, GNorm = 0.4776, lr_0 = 4.6096e-04
Loss = 1.0352e-02, PNorm = 156.4490, GNorm = 0.4021, lr_0 = 4.6064e-04
Loss = 1.4695e-02, PNorm = 156.4736, GNorm = 0.1184, lr_0 = 4.6033e-04
Loss = 1.1528e-02, PNorm = 156.5023, GNorm = 0.1114, lr_0 = 4.6001e-04
Loss = 1.5462e-02, PNorm = 156.5295, GNorm = 0.9775, lr_0 = 4.5970e-04
Loss = 1.0494e-02, PNorm = 156.5587, GNorm = 0.3201, lr_0 = 4.5938e-04
Loss = 1.3177e-02, PNorm = 156.5840, GNorm = 0.4272, lr_0 = 4.5907e-04
Loss = 1.1216e-02, PNorm = 156.6094, GNorm = 0.1486, lr_0 = 4.5875e-04
Loss = 1.3607e-02, PNorm = 156.6307, GNorm = 0.2581, lr_0 = 4.5844e-04
Loss = 9.9004e-03, PNorm = 156.6530, GNorm = 0.4876, lr_0 = 4.5812e-04
Loss = 1.2705e-02, PNorm = 156.6782, GNorm = 0.2874, lr_0 = 4.5781e-04
Loss = 1.3441e-02, PNorm = 156.7051, GNorm = 0.3652, lr_0 = 4.5750e-04
Loss = 1.0207e-02, PNorm = 156.7325, GNorm = 0.5098, lr_0 = 4.5718e-04
Loss = 1.0971e-02, PNorm = 156.7592, GNorm = 0.3417, lr_0 = 4.5687e-04
Loss = 1.1927e-02, PNorm = 156.7823, GNorm = 0.3182, lr_0 = 4.5656e-04
Loss = 1.1735e-02, PNorm = 156.8084, GNorm = 0.1062, lr_0 = 4.5624e-04
Loss = 1.2782e-02, PNorm = 156.8355, GNorm = 0.1068, lr_0 = 4.5593e-04
Loss = 1.1570e-02, PNorm = 156.8603, GNorm = 0.6102, lr_0 = 4.5562e-04
Loss = 1.5417e-02, PNorm = 156.8882, GNorm = 0.4436, lr_0 = 4.5531e-04
Loss = 1.2924e-02, PNorm = 156.9137, GNorm = 0.4797, lr_0 = 4.5499e-04
Loss = 1.1789e-02, PNorm = 156.9427, GNorm = 0.2546, lr_0 = 4.5468e-04
Loss = 1.0910e-02, PNorm = 156.9739, GNorm = 0.2880, lr_0 = 4.5437e-04
Loss = 1.2476e-02, PNorm = 157.0047, GNorm = 0.5794, lr_0 = 4.5406e-04
Loss = 1.2057e-02, PNorm = 157.0353, GNorm = 0.1575, lr_0 = 4.5375e-04
Loss = 1.4072e-02, PNorm = 157.0636, GNorm = 0.2644, lr_0 = 4.5344e-04
Loss = 1.0480e-02, PNorm = 157.0893, GNorm = 0.2387, lr_0 = 4.5313e-04
Loss = 9.6797e-03, PNorm = 157.1179, GNorm = 0.3823, lr_0 = 4.5282e-04
Loss = 1.4306e-02, PNorm = 157.1462, GNorm = 0.3199, lr_0 = 4.5251e-04
Loss = 1.0866e-02, PNorm = 157.1725, GNorm = 0.4039, lr_0 = 4.5220e-04
Loss = 9.6304e-03, PNorm = 157.1992, GNorm = 0.1324, lr_0 = 4.5189e-04
Loss = 1.4555e-02, PNorm = 157.2257, GNorm = 0.3360, lr_0 = 4.5158e-04
Loss = 1.2419e-02, PNorm = 157.2483, GNorm = 0.1796, lr_0 = 4.5127e-04
Loss = 1.1142e-02, PNorm = 157.2733, GNorm = 0.4662, lr_0 = 4.5096e-04
Loss = 1.3625e-02, PNorm = 157.2995, GNorm = 0.3343, lr_0 = 4.5065e-04
Loss = 1.5590e-02, PNorm = 157.3300, GNorm = 0.2538, lr_0 = 4.5034e-04
Loss = 1.0343e-02, PNorm = 157.3553, GNorm = 0.3756, lr_0 = 4.5003e-04
Loss = 1.1406e-02, PNorm = 157.3829, GNorm = 0.1806, lr_0 = 4.4972e-04
Loss = 1.0611e-02, PNorm = 157.4056, GNorm = 0.1601, lr_0 = 4.4942e-04
Loss = 1.0807e-02, PNorm = 157.4330, GNorm = 0.2386, lr_0 = 4.4911e-04
Loss = 1.0413e-02, PNorm = 157.4623, GNorm = 0.1154, lr_0 = 4.4880e-04
Loss = 1.3108e-02, PNorm = 157.4855, GNorm = 0.4946, lr_0 = 4.4849e-04
Loss = 1.3920e-02, PNorm = 157.5120, GNorm = 0.2769, lr_0 = 4.4819e-04
Loss = 1.0566e-02, PNorm = 157.5395, GNorm = 0.2913, lr_0 = 4.4788e-04
Loss = 1.0115e-02, PNorm = 157.5658, GNorm = 0.5331, lr_0 = 4.4757e-04
Loss = 1.3237e-02, PNorm = 157.5939, GNorm = 0.3528, lr_0 = 4.4727e-04
Loss = 1.1723e-02, PNorm = 157.6229, GNorm = 0.2947, lr_0 = 4.4696e-04
Loss = 1.1635e-02, PNorm = 157.6500, GNorm = 0.4327, lr_0 = 4.4665e-04
Loss = 9.8713e-03, PNorm = 157.6769, GNorm = 0.2128, lr_0 = 4.4635e-04
Loss = 1.1183e-02, PNorm = 157.7042, GNorm = 0.3254, lr_0 = 4.4604e-04
Loss = 1.0697e-02, PNorm = 157.7312, GNorm = 0.1582, lr_0 = 4.4574e-04
Loss = 1.1160e-02, PNorm = 157.7582, GNorm = 0.2693, lr_0 = 4.4543e-04
Loss = 1.2404e-02, PNorm = 157.7826, GNorm = 0.3036, lr_0 = 4.4513e-04
Loss = 1.1007e-02, PNorm = 157.8096, GNorm = 0.1856, lr_0 = 4.4482e-04
Loss = 1.1104e-02, PNorm = 157.8412, GNorm = 0.5346, lr_0 = 4.4452e-04
Loss = 1.1605e-02, PNorm = 157.8749, GNorm = 0.1808, lr_0 = 4.4421e-04
Loss = 1.1324e-02, PNorm = 157.9045, GNorm = 0.1524, lr_0 = 4.4391e-04
Loss = 9.9088e-03, PNorm = 157.9306, GNorm = 0.3766, lr_0 = 4.4360e-04
Loss = 1.1382e-02, PNorm = 157.9570, GNorm = 0.1289, lr_0 = 4.4330e-04
Loss = 1.0466e-02, PNorm = 157.9879, GNorm = 0.2949, lr_0 = 4.4299e-04
Loss = 1.1902e-02, PNorm = 158.0143, GNorm = 0.2571, lr_0 = 4.4269e-04
Loss = 1.3618e-02, PNorm = 158.0448, GNorm = 0.3793, lr_0 = 4.4239e-04
Loss = 1.3055e-02, PNorm = 158.0744, GNorm = 0.5315, lr_0 = 4.4209e-04
Loss = 1.2991e-02, PNorm = 158.0997, GNorm = 0.2869, lr_0 = 4.4178e-04
Loss = 1.1186e-02, PNorm = 158.1224, GNorm = 0.4384, lr_0 = 4.4148e-04
Loss = 9.5114e-03, PNorm = 158.1481, GNorm = 0.1708, lr_0 = 4.4118e-04
Loss = 1.1955e-02, PNorm = 158.1715, GNorm = 0.1834, lr_0 = 4.4088e-04
Loss = 1.0615e-02, PNorm = 158.1964, GNorm = 0.2492, lr_0 = 4.4057e-04
Loss = 1.4087e-02, PNorm = 158.2213, GNorm = 0.2225, lr_0 = 4.4027e-04
Loss = 1.7940e-02, PNorm = 158.2514, GNorm = 0.9338, lr_0 = 4.3997e-04
Loss = 1.3292e-02, PNorm = 158.2751, GNorm = 0.2916, lr_0 = 4.3967e-04
Loss = 9.8409e-03, PNorm = 158.3052, GNorm = 0.5069, lr_0 = 4.3937e-04
Validation mae = 0.281052
Epoch 12
Loss = 1.1460e-02, PNorm = 158.3289, GNorm = 0.2052, lr_0 = 4.3907e-04
Loss = 9.4080e-03, PNorm = 158.3486, GNorm = 0.2743, lr_0 = 4.3877e-04
Loss = 1.0431e-02, PNorm = 158.3688, GNorm = 0.1147, lr_0 = 4.3846e-04
Loss = 1.1508e-02, PNorm = 158.3890, GNorm = 0.2700, lr_0 = 4.3816e-04
Loss = 1.0159e-02, PNorm = 158.4088, GNorm = 0.3748, lr_0 = 4.3786e-04
Loss = 1.0961e-02, PNorm = 158.4282, GNorm = 0.7181, lr_0 = 4.3756e-04
Loss = 1.0145e-02, PNorm = 158.4481, GNorm = 0.2735, lr_0 = 4.3726e-04
Loss = 1.6104e-02, PNorm = 158.4648, GNorm = 0.5882, lr_0 = 4.3696e-04
Loss = 1.0751e-02, PNorm = 158.4905, GNorm = 0.1971, lr_0 = 4.3667e-04
Loss = 1.0115e-02, PNorm = 158.5138, GNorm = 0.2000, lr_0 = 4.3637e-04
Loss = 1.0422e-02, PNorm = 158.5378, GNorm = 0.5265, lr_0 = 4.3607e-04
Loss = 9.2826e-03, PNorm = 158.5558, GNorm = 0.5726, lr_0 = 4.3577e-04
Loss = 8.0578e-03, PNorm = 158.5786, GNorm = 0.3101, lr_0 = 4.3547e-04
Loss = 9.5184e-03, PNorm = 158.6023, GNorm = 0.1737, lr_0 = 4.3517e-04
Loss = 1.2133e-02, PNorm = 158.6214, GNorm = 0.7088, lr_0 = 4.3487e-04
Loss = 1.3630e-02, PNorm = 158.6448, GNorm = 0.3014, lr_0 = 4.3458e-04
Loss = 8.9466e-03, PNorm = 158.6675, GNorm = 0.3580, lr_0 = 4.3428e-04
Loss = 1.0732e-02, PNorm = 158.6825, GNorm = 0.3260, lr_0 = 4.3398e-04
Loss = 8.6602e-03, PNorm = 158.6968, GNorm = 0.2105, lr_0 = 4.3368e-04
Loss = 9.6832e-03, PNorm = 158.7168, GNorm = 0.2800, lr_0 = 4.3339e-04
Loss = 1.1777e-02, PNorm = 158.7359, GNorm = 0.1169, lr_0 = 4.3309e-04
Loss = 1.0384e-02, PNorm = 158.7536, GNorm = 0.6617, lr_0 = 4.3279e-04
Loss = 1.1596e-02, PNorm = 158.7749, GNorm = 0.2355, lr_0 = 4.3250e-04
Loss = 1.1796e-02, PNorm = 158.8034, GNorm = 0.7523, lr_0 = 4.3220e-04
Loss = 1.0655e-02, PNorm = 158.8256, GNorm = 0.2684, lr_0 = 4.3190e-04
Loss = 9.4987e-03, PNorm = 158.8495, GNorm = 0.1269, lr_0 = 4.3161e-04
Loss = 9.8057e-03, PNorm = 158.8720, GNorm = 0.1604, lr_0 = 4.3131e-04
Loss = 1.0598e-02, PNorm = 158.8937, GNorm = 0.5315, lr_0 = 4.3102e-04
Loss = 1.2270e-02, PNorm = 158.9143, GNorm = 0.2750, lr_0 = 4.3072e-04
Loss = 1.0632e-02, PNorm = 158.9304, GNorm = 0.1628, lr_0 = 4.3043e-04
Loss = 1.0086e-02, PNorm = 158.9484, GNorm = 0.1388, lr_0 = 4.3013e-04
Loss = 1.0548e-02, PNorm = 158.9655, GNorm = 0.1625, lr_0 = 4.2984e-04
Loss = 1.1819e-02, PNorm = 158.9882, GNorm = 0.1724, lr_0 = 4.2954e-04
Loss = 8.3626e-03, PNorm = 159.0113, GNorm = 0.1750, lr_0 = 4.2925e-04
Loss = 8.0039e-03, PNorm = 159.0310, GNorm = 0.1064, lr_0 = 4.2895e-04
Loss = 1.0348e-02, PNorm = 159.0499, GNorm = 0.1437, lr_0 = 4.2866e-04
Loss = 1.0497e-02, PNorm = 159.0709, GNorm = 0.2747, lr_0 = 4.2837e-04
Loss = 9.2065e-03, PNorm = 159.0885, GNorm = 0.1860, lr_0 = 4.2807e-04
Loss = 8.8935e-03, PNorm = 159.1088, GNorm = 0.4934, lr_0 = 4.2778e-04
Loss = 1.2302e-02, PNorm = 159.1285, GNorm = 0.1691, lr_0 = 4.2749e-04
Loss = 1.0428e-02, PNorm = 159.1487, GNorm = 0.3751, lr_0 = 4.2719e-04
Loss = 1.0432e-02, PNorm = 159.1712, GNorm = 0.2310, lr_0 = 4.2690e-04
Loss = 8.6660e-03, PNorm = 159.1955, GNorm = 0.2641, lr_0 = 4.2661e-04
Loss = 8.9681e-03, PNorm = 159.2151, GNorm = 0.2807, lr_0 = 4.2632e-04
Loss = 7.6516e-03, PNorm = 159.2289, GNorm = 0.5485, lr_0 = 4.2602e-04
Loss = 1.0227e-02, PNorm = 159.2515, GNorm = 0.2139, lr_0 = 4.2573e-04
Loss = 8.8931e-03, PNorm = 159.2745, GNorm = 0.3934, lr_0 = 4.2544e-04
Loss = 8.7219e-03, PNorm = 159.2909, GNorm = 0.1247, lr_0 = 4.2515e-04
Loss = 1.1597e-02, PNorm = 159.3109, GNorm = 0.1582, lr_0 = 4.2486e-04
Loss = 9.4715e-03, PNorm = 159.3280, GNorm = 0.1810, lr_0 = 4.2457e-04
Loss = 1.3143e-02, PNorm = 159.3468, GNorm = 0.2263, lr_0 = 4.2428e-04
Loss = 9.0803e-03, PNorm = 159.3687, GNorm = 0.1175, lr_0 = 4.2399e-04
Loss = 8.3060e-03, PNorm = 159.3895, GNorm = 0.2731, lr_0 = 4.2370e-04
Loss = 9.2005e-03, PNorm = 159.4095, GNorm = 0.4241, lr_0 = 4.2340e-04
Loss = 8.6507e-03, PNorm = 159.4310, GNorm = 0.6405, lr_0 = 4.2311e-04
Loss = 9.1290e-03, PNorm = 159.4505, GNorm = 0.1640, lr_0 = 4.2283e-04
Loss = 1.0662e-02, PNorm = 159.4678, GNorm = 0.3300, lr_0 = 4.2254e-04
Loss = 1.1084e-02, PNorm = 159.4875, GNorm = 0.2242, lr_0 = 4.2225e-04
Loss = 8.1581e-03, PNorm = 159.5101, GNorm = 0.1907, lr_0 = 4.2196e-04
Loss = 8.3739e-03, PNorm = 159.5323, GNorm = 0.1406, lr_0 = 4.2167e-04
Loss = 9.8326e-03, PNorm = 159.5524, GNorm = 0.4960, lr_0 = 4.2138e-04
Loss = 1.0764e-02, PNorm = 159.5708, GNorm = 0.1189, lr_0 = 4.2109e-04
Loss = 8.0199e-03, PNorm = 159.5915, GNorm = 0.3675, lr_0 = 4.2080e-04
Loss = 9.1849e-03, PNorm = 159.6150, GNorm = 0.4212, lr_0 = 4.2051e-04
Loss = 8.1113e-03, PNorm = 159.6375, GNorm = 0.1884, lr_0 = 4.2023e-04
Loss = 8.7244e-03, PNorm = 159.6592, GNorm = 0.3334, lr_0 = 4.1994e-04
Loss = 8.2947e-03, PNorm = 159.6780, GNorm = 0.2239, lr_0 = 4.1965e-04
Loss = 1.0237e-02, PNorm = 159.6968, GNorm = 0.2035, lr_0 = 4.1936e-04
Loss = 8.0966e-03, PNorm = 159.7167, GNorm = 0.3084, lr_0 = 4.1907e-04
Loss = 1.2326e-02, PNorm = 159.7365, GNorm = 0.1421, lr_0 = 4.1879e-04
Loss = 9.5760e-03, PNorm = 159.7600, GNorm = 0.2961, lr_0 = 4.1850e-04
Loss = 8.9557e-03, PNorm = 159.7825, GNorm = 0.2933, lr_0 = 4.1821e-04
Loss = 1.2684e-02, PNorm = 159.8063, GNorm = 0.1860, lr_0 = 4.1793e-04
Loss = 1.0925e-02, PNorm = 159.8308, GNorm = 0.3726, lr_0 = 4.1764e-04
Loss = 1.4506e-02, PNorm = 159.8513, GNorm = 0.3996, lr_0 = 4.1736e-04
Loss = 1.0901e-02, PNorm = 159.8754, GNorm = 0.3693, lr_0 = 4.1707e-04
Loss = 1.1235e-02, PNorm = 159.9041, GNorm = 0.3446, lr_0 = 4.1678e-04
Loss = 7.7059e-03, PNorm = 159.9299, GNorm = 0.2092, lr_0 = 4.1650e-04
Loss = 9.6835e-03, PNorm = 159.9510, GNorm = 0.5778, lr_0 = 4.1621e-04
Loss = 9.5526e-03, PNorm = 159.9680, GNorm = 0.1505, lr_0 = 4.1593e-04
Loss = 1.1167e-02, PNorm = 159.9875, GNorm = 0.1764, lr_0 = 4.1564e-04
Loss = 8.5652e-03, PNorm = 160.0089, GNorm = 0.2485, lr_0 = 4.1536e-04
Loss = 9.4475e-03, PNorm = 160.0287, GNorm = 0.3605, lr_0 = 4.1507e-04
Loss = 8.6898e-03, PNorm = 160.0515, GNorm = 0.1570, lr_0 = 4.1479e-04
Loss = 9.8010e-03, PNorm = 160.0752, GNorm = 0.1498, lr_0 = 4.1450e-04
Loss = 9.7934e-03, PNorm = 160.0969, GNorm = 0.3821, lr_0 = 4.1422e-04
Loss = 1.0754e-02, PNorm = 160.1196, GNorm = 0.1919, lr_0 = 4.1394e-04
Loss = 8.8109e-03, PNorm = 160.1455, GNorm = 0.1997, lr_0 = 4.1365e-04
Loss = 1.0020e-02, PNorm = 160.1712, GNorm = 0.1507, lr_0 = 4.1337e-04
Loss = 1.0258e-02, PNorm = 160.1933, GNorm = 0.0964, lr_0 = 4.1309e-04
Loss = 1.3026e-02, PNorm = 160.2152, GNorm = 0.4816, lr_0 = 4.1280e-04
Loss = 9.4238e-03, PNorm = 160.2383, GNorm = 0.2181, lr_0 = 4.1252e-04
Loss = 9.0666e-03, PNorm = 160.2604, GNorm = 0.4682, lr_0 = 4.1224e-04
Loss = 1.1535e-02, PNorm = 160.2801, GNorm = 0.3236, lr_0 = 4.1196e-04
Loss = 8.4812e-03, PNorm = 160.3039, GNorm = 0.2960, lr_0 = 4.1167e-04
Loss = 9.3540e-03, PNorm = 160.3306, GNorm = 0.2804, lr_0 = 4.1139e-04
Loss = 1.0915e-02, PNorm = 160.3537, GNorm = 0.7443, lr_0 = 4.1111e-04
Loss = 1.0357e-02, PNorm = 160.3712, GNorm = 0.1917, lr_0 = 4.1083e-04
Loss = 9.8816e-03, PNorm = 160.3922, GNorm = 0.5962, lr_0 = 4.1055e-04
Loss = 9.5519e-03, PNorm = 160.4151, GNorm = 0.1333, lr_0 = 4.1027e-04
Loss = 8.6711e-03, PNorm = 160.4417, GNorm = 0.2354, lr_0 = 4.0998e-04
Loss = 1.0245e-02, PNorm = 160.4672, GNorm = 0.1463, lr_0 = 4.0970e-04
Loss = 1.0047e-02, PNorm = 160.4895, GNorm = 0.1491, lr_0 = 4.0942e-04
Loss = 9.7718e-03, PNorm = 160.5120, GNorm = 0.1192, lr_0 = 4.0914e-04
Loss = 9.1449e-03, PNorm = 160.5360, GNorm = 0.2894, lr_0 = 4.0886e-04
Loss = 8.5220e-03, PNorm = 160.5597, GNorm = 0.1539, lr_0 = 4.0858e-04
Loss = 1.2597e-02, PNorm = 160.5843, GNorm = 0.4559, lr_0 = 4.0830e-04
Loss = 8.8319e-03, PNorm = 160.6070, GNorm = 0.2819, lr_0 = 4.0802e-04
Loss = 1.0255e-02, PNorm = 160.6266, GNorm = 0.3996, lr_0 = 4.0774e-04
Loss = 1.1058e-02, PNorm = 160.6494, GNorm = 0.2254, lr_0 = 4.0746e-04
Loss = 9.3366e-03, PNorm = 160.6740, GNorm = 0.2426, lr_0 = 4.0718e-04
Loss = 8.8382e-03, PNorm = 160.6983, GNorm = 0.3718, lr_0 = 4.0691e-04
Loss = 9.0418e-03, PNorm = 160.7218, GNorm = 0.1686, lr_0 = 4.0663e-04
Loss = 1.0715e-02, PNorm = 160.7422, GNorm = 0.2123, lr_0 = 4.0635e-04
Loss = 9.8726e-03, PNorm = 160.7613, GNorm = 0.1639, lr_0 = 4.0607e-04
Loss = 7.3890e-03, PNorm = 160.7803, GNorm = 0.1002, lr_0 = 4.0579e-04
Loss = 1.0076e-02, PNorm = 160.8003, GNorm = 0.4363, lr_0 = 4.0551e-04
Loss = 1.0269e-02, PNorm = 160.8210, GNorm = 0.3143, lr_0 = 4.0524e-04
Loss = 1.1439e-02, PNorm = 160.8434, GNorm = 0.2728, lr_0 = 4.0496e-04
Loss = 1.0777e-02, PNorm = 160.8692, GNorm = 0.3635, lr_0 = 4.0468e-04
Validation mae = 0.280015
Epoch 13
Loss = 9.1941e-03, PNorm = 160.8908, GNorm = 0.2589, lr_0 = 4.0440e-04
Loss = 8.7852e-03, PNorm = 160.9080, GNorm = 0.2685, lr_0 = 4.0413e-04
Loss = 8.1573e-03, PNorm = 160.9247, GNorm = 0.2458, lr_0 = 4.0385e-04
Loss = 8.4614e-03, PNorm = 160.9429, GNorm = 0.1233, lr_0 = 4.0357e-04
Loss = 8.9695e-03, PNorm = 160.9610, GNorm = 0.4345, lr_0 = 4.0330e-04
Loss = 8.8215e-03, PNorm = 160.9771, GNorm = 0.1092, lr_0 = 4.0302e-04
Loss = 8.8280e-03, PNorm = 160.9912, GNorm = 0.2556, lr_0 = 4.0274e-04
Loss = 9.9174e-03, PNorm = 161.0083, GNorm = 0.3517, lr_0 = 4.0247e-04
Loss = 9.7881e-03, PNorm = 161.0217, GNorm = 0.2528, lr_0 = 4.0219e-04
Loss = 8.5127e-03, PNorm = 161.0361, GNorm = 0.0799, lr_0 = 4.0192e-04
Loss = 1.0405e-02, PNorm = 161.0497, GNorm = 0.4349, lr_0 = 4.0164e-04
Loss = 1.0386e-02, PNorm = 161.0695, GNorm = 0.2626, lr_0 = 4.0137e-04
Loss = 7.9486e-03, PNorm = 161.0848, GNorm = 0.3108, lr_0 = 4.0109e-04
Loss = 8.0705e-03, PNorm = 161.1004, GNorm = 0.2086, lr_0 = 4.0082e-04
Loss = 7.8160e-03, PNorm = 161.1172, GNorm = 0.3547, lr_0 = 4.0054e-04
Loss = 8.1999e-03, PNorm = 161.1323, GNorm = 0.3803, lr_0 = 4.0027e-04
Loss = 7.5986e-03, PNorm = 161.1515, GNorm = 0.7126, lr_0 = 3.9999e-04
Loss = 7.5212e-03, PNorm = 161.1662, GNorm = 0.1745, lr_0 = 3.9972e-04
Loss = 9.2522e-03, PNorm = 161.1824, GNorm = 0.5352, lr_0 = 3.9945e-04
Loss = 7.9788e-03, PNorm = 161.1965, GNorm = 0.0755, lr_0 = 3.9917e-04
Loss = 8.0418e-03, PNorm = 161.2123, GNorm = 0.2316, lr_0 = 3.9890e-04
Loss = 9.0919e-03, PNorm = 161.2271, GNorm = 0.1889, lr_0 = 3.9863e-04
Loss = 1.0619e-02, PNorm = 161.2461, GNorm = 0.6712, lr_0 = 3.9835e-04
Loss = 9.9903e-03, PNorm = 161.2630, GNorm = 0.3083, lr_0 = 3.9808e-04
Loss = 7.7761e-03, PNorm = 161.2843, GNorm = 0.1383, lr_0 = 3.9781e-04
Loss = 7.8285e-03, PNorm = 161.3000, GNorm = 0.2215, lr_0 = 3.9753e-04
Loss = 1.1325e-02, PNorm = 161.3174, GNorm = 0.3481, lr_0 = 3.9726e-04
Loss = 8.5095e-03, PNorm = 161.3338, GNorm = 0.1224, lr_0 = 3.9699e-04
Loss = 8.2287e-03, PNorm = 161.3525, GNorm = 0.1419, lr_0 = 3.9672e-04
Loss = 8.2683e-03, PNorm = 161.3737, GNorm = 0.5736, lr_0 = 3.9645e-04
Loss = 9.2641e-03, PNorm = 161.3921, GNorm = 0.1444, lr_0 = 3.9617e-04
Loss = 8.7523e-03, PNorm = 161.4093, GNorm = 0.4834, lr_0 = 3.9590e-04
Loss = 9.7756e-03, PNorm = 161.4286, GNorm = 0.1717, lr_0 = 3.9563e-04
Loss = 8.3634e-03, PNorm = 161.4469, GNorm = 0.2792, lr_0 = 3.9536e-04
Loss = 1.0419e-02, PNorm = 161.4644, GNorm = 0.2316, lr_0 = 3.9509e-04
Loss = 9.5418e-03, PNorm = 161.4815, GNorm = 0.6011, lr_0 = 3.9482e-04
Loss = 1.1159e-02, PNorm = 161.4997, GNorm = 0.5630, lr_0 = 3.9455e-04
Loss = 8.2562e-03, PNorm = 161.5159, GNorm = 0.2161, lr_0 = 3.9428e-04
Loss = 1.0030e-02, PNorm = 161.5327, GNorm = 0.1658, lr_0 = 3.9401e-04
Loss = 1.0147e-02, PNorm = 161.5489, GNorm = 0.3722, lr_0 = 3.9374e-04
Loss = 7.4933e-03, PNorm = 161.5686, GNorm = 0.1987, lr_0 = 3.9347e-04
Loss = 7.0028e-03, PNorm = 161.5856, GNorm = 0.1051, lr_0 = 3.9320e-04
Loss = 6.6069e-03, PNorm = 161.6029, GNorm = 0.2764, lr_0 = 3.9293e-04
Loss = 8.0850e-03, PNorm = 161.6199, GNorm = 0.3171, lr_0 = 3.9266e-04
Loss = 9.3023e-03, PNorm = 161.6391, GNorm = 0.1511, lr_0 = 3.9239e-04
Loss = 8.6654e-03, PNorm = 161.6580, GNorm = 0.3559, lr_0 = 3.9212e-04
Loss = 1.1928e-02, PNorm = 161.6817, GNorm = 0.2599, lr_0 = 3.9185e-04
Loss = 1.1062e-02, PNorm = 161.7057, GNorm = 0.1013, lr_0 = 3.9159e-04
Loss = 7.8217e-03, PNorm = 161.7309, GNorm = 0.5409, lr_0 = 3.9132e-04
Loss = 9.5898e-03, PNorm = 161.7496, GNorm = 0.2213, lr_0 = 3.9105e-04
Loss = 8.4177e-03, PNorm = 161.7682, GNorm = 0.2585, lr_0 = 3.9078e-04
Loss = 1.1226e-02, PNorm = 161.7860, GNorm = 0.3152, lr_0 = 3.9051e-04
Loss = 1.1432e-02, PNorm = 161.8033, GNorm = 0.2197, lr_0 = 3.9025e-04
Loss = 8.5972e-03, PNorm = 161.8199, GNorm = 0.2694, lr_0 = 3.8998e-04
Loss = 9.1865e-03, PNorm = 161.8381, GNorm = 0.1299, lr_0 = 3.8971e-04
Loss = 7.8107e-03, PNorm = 161.8560, GNorm = 0.0752, lr_0 = 3.8945e-04
Loss = 7.7328e-03, PNorm = 161.8710, GNorm = 0.3363, lr_0 = 3.8918e-04
Loss = 7.8514e-03, PNorm = 161.8890, GNorm = 0.1983, lr_0 = 3.8891e-04
Loss = 6.7147e-03, PNorm = 161.9052, GNorm = 0.1022, lr_0 = 3.8865e-04
Loss = 6.6395e-03, PNorm = 161.9202, GNorm = 0.1688, lr_0 = 3.8838e-04
Loss = 7.1096e-03, PNorm = 161.9336, GNorm = 0.4319, lr_0 = 3.8811e-04
Loss = 9.4760e-03, PNorm = 161.9520, GNorm = 0.4680, lr_0 = 3.8785e-04
Loss = 1.0632e-02, PNorm = 161.9717, GNorm = 0.2284, lr_0 = 3.8758e-04
Loss = 8.8178e-03, PNorm = 161.9902, GNorm = 0.4149, lr_0 = 3.8732e-04
Loss = 7.2134e-03, PNorm = 162.0071, GNorm = 0.1225, lr_0 = 3.8705e-04
Loss = 7.7382e-03, PNorm = 162.0230, GNorm = 0.3419, lr_0 = 3.8679e-04
Loss = 6.9504e-03, PNorm = 162.0409, GNorm = 0.1822, lr_0 = 3.8652e-04
Loss = 8.5593e-03, PNorm = 162.0581, GNorm = 0.4986, lr_0 = 3.8626e-04
Loss = 8.1951e-03, PNorm = 162.0781, GNorm = 0.2190, lr_0 = 3.8599e-04
Loss = 8.2687e-03, PNorm = 162.0960, GNorm = 0.1267, lr_0 = 3.8573e-04
Loss = 8.8638e-03, PNorm = 162.1144, GNorm = 0.1400, lr_0 = 3.8546e-04
Loss = 7.2141e-03, PNorm = 162.1323, GNorm = 0.1745, lr_0 = 3.8520e-04
Loss = 7.8075e-03, PNorm = 162.1536, GNorm = 0.3211, lr_0 = 3.8493e-04
Loss = 7.5025e-03, PNorm = 162.1765, GNorm = 0.5674, lr_0 = 3.8467e-04
Loss = 9.9357e-03, PNorm = 162.1938, GNorm = 0.3093, lr_0 = 3.8441e-04
Loss = 7.2736e-03, PNorm = 162.2090, GNorm = 0.2308, lr_0 = 3.8414e-04
Loss = 7.3772e-03, PNorm = 162.2227, GNorm = 0.3064, lr_0 = 3.8388e-04
Loss = 9.4770e-03, PNorm = 162.2396, GNorm = 0.0941, lr_0 = 3.8362e-04
Loss = 8.7201e-03, PNorm = 162.2600, GNorm = 0.4577, lr_0 = 3.8336e-04
Loss = 8.6393e-03, PNorm = 162.2859, GNorm = 0.5550, lr_0 = 3.8309e-04
Loss = 7.1437e-03, PNorm = 162.3018, GNorm = 0.2968, lr_0 = 3.8283e-04
Loss = 7.9141e-03, PNorm = 162.3176, GNorm = 0.2404, lr_0 = 3.8257e-04
Loss = 7.1759e-03, PNorm = 162.3312, GNorm = 0.3250, lr_0 = 3.8231e-04
Loss = 7.1556e-03, PNorm = 162.3486, GNorm = 0.2427, lr_0 = 3.8204e-04
Loss = 7.7618e-03, PNorm = 162.3705, GNorm = 0.2165, lr_0 = 3.8178e-04
Loss = 9.0835e-03, PNorm = 162.3855, GNorm = 0.1741, lr_0 = 3.8152e-04
Loss = 7.2862e-03, PNorm = 162.4013, GNorm = 0.4047, lr_0 = 3.8126e-04
Loss = 6.6977e-03, PNorm = 162.4192, GNorm = 0.1484, lr_0 = 3.8100e-04
Loss = 8.6831e-03, PNorm = 162.4365, GNorm = 0.3594, lr_0 = 3.8074e-04
Loss = 8.8529e-03, PNorm = 162.4541, GNorm = 0.3657, lr_0 = 3.8048e-04
Loss = 9.5675e-03, PNorm = 162.4727, GNorm = 0.2252, lr_0 = 3.8022e-04
Loss = 9.5479e-03, PNorm = 162.4913, GNorm = 0.3516, lr_0 = 3.7995e-04
Loss = 1.1451e-02, PNorm = 162.5138, GNorm = 0.1811, lr_0 = 3.7969e-04
Loss = 1.0062e-02, PNorm = 162.5354, GNorm = 0.2093, lr_0 = 3.7943e-04
Loss = 9.0130e-03, PNorm = 162.5575, GNorm = 0.2573, lr_0 = 3.7917e-04
Loss = 8.0639e-03, PNorm = 162.5781, GNorm = 0.5049, lr_0 = 3.7891e-04
Loss = 8.1716e-03, PNorm = 162.6012, GNorm = 0.1952, lr_0 = 3.7866e-04
Loss = 7.7538e-03, PNorm = 162.6210, GNorm = 0.4261, lr_0 = 3.7840e-04
Loss = 9.2537e-03, PNorm = 162.6395, GNorm = 0.2769, lr_0 = 3.7814e-04
Loss = 7.6465e-03, PNorm = 162.6562, GNorm = 0.1839, lr_0 = 3.7788e-04
Loss = 8.1905e-03, PNorm = 162.6741, GNorm = 0.1048, lr_0 = 3.7762e-04
Loss = 1.0054e-02, PNorm = 162.6896, GNorm = 0.2589, lr_0 = 3.7736e-04
Loss = 7.6881e-03, PNorm = 162.7062, GNorm = 0.1768, lr_0 = 3.7710e-04
Loss = 8.8962e-03, PNorm = 162.7239, GNorm = 0.5208, lr_0 = 3.7684e-04
Loss = 8.4371e-03, PNorm = 162.7450, GNorm = 0.8159, lr_0 = 3.7659e-04
Loss = 1.0059e-02, PNorm = 162.7578, GNorm = 0.3788, lr_0 = 3.7633e-04
Loss = 8.1728e-03, PNorm = 162.7753, GNorm = 0.1392, lr_0 = 3.7607e-04
Loss = 6.8505e-03, PNorm = 162.7943, GNorm = 0.5015, lr_0 = 3.7581e-04
Loss = 1.3230e-02, PNorm = 162.8154, GNorm = 0.2478, lr_0 = 3.7555e-04
Loss = 1.1695e-02, PNorm = 162.8388, GNorm = 0.1751, lr_0 = 3.7530e-04
Loss = 8.1043e-03, PNorm = 162.8611, GNorm = 0.2390, lr_0 = 3.7504e-04
Loss = 7.0196e-03, PNorm = 162.8851, GNorm = 0.5227, lr_0 = 3.7478e-04
Loss = 8.1345e-03, PNorm = 162.9046, GNorm = 0.1803, lr_0 = 3.7453e-04
Loss = 7.8080e-03, PNorm = 162.9209, GNorm = 0.5264, lr_0 = 3.7427e-04
Loss = 1.0504e-02, PNorm = 162.9399, GNorm = 0.2886, lr_0 = 3.7401e-04
Loss = 7.6234e-03, PNorm = 162.9635, GNorm = 0.3256, lr_0 = 3.7376e-04
Loss = 8.9175e-03, PNorm = 162.9878, GNorm = 0.3162, lr_0 = 3.7350e-04
Loss = 6.8713e-03, PNorm = 163.0040, GNorm = 0.3301, lr_0 = 3.7325e-04
Loss = 8.1697e-03, PNorm = 163.0209, GNorm = 0.4323, lr_0 = 3.7299e-04
Loss = 9.6192e-03, PNorm = 163.0385, GNorm = 0.1472, lr_0 = 3.7273e-04
Validation mae = 0.279996
Epoch 14
Loss = 6.2261e-03, PNorm = 163.0562, GNorm = 0.2105, lr_0 = 3.7248e-04
Loss = 7.0277e-03, PNorm = 163.0726, GNorm = 0.1860, lr_0 = 3.7222e-04
Loss = 6.2738e-03, PNorm = 163.0862, GNorm = 0.1437, lr_0 = 3.7197e-04
Loss = 7.1802e-03, PNorm = 163.0988, GNorm = 0.2025, lr_0 = 3.7171e-04
Loss = 6.4678e-03, PNorm = 163.1135, GNorm = 0.2460, lr_0 = 3.7146e-04
Loss = 6.9355e-03, PNorm = 163.1246, GNorm = 0.2418, lr_0 = 3.7120e-04
Loss = 7.6541e-03, PNorm = 163.1397, GNorm = 0.2542, lr_0 = 3.7095e-04
Loss = 6.7644e-03, PNorm = 163.1578, GNorm = 0.2743, lr_0 = 3.7070e-04
Loss = 7.1047e-03, PNorm = 163.1695, GNorm = 0.0921, lr_0 = 3.7044e-04
Loss = 8.7187e-03, PNorm = 163.1767, GNorm = 0.5010, lr_0 = 3.7019e-04
Loss = 7.7943e-03, PNorm = 163.1907, GNorm = 0.3672, lr_0 = 3.6993e-04
Loss = 5.9439e-03, PNorm = 163.2032, GNorm = 0.1683, lr_0 = 3.6968e-04
Loss = 7.9251e-03, PNorm = 163.2173, GNorm = 0.1612, lr_0 = 3.6943e-04
Loss = 6.9680e-03, PNorm = 163.2278, GNorm = 0.2506, lr_0 = 3.6917e-04
Loss = 7.7998e-03, PNorm = 163.2397, GNorm = 0.2907, lr_0 = 3.6892e-04
Loss = 6.3004e-03, PNorm = 163.2541, GNorm = 0.2602, lr_0 = 3.6867e-04
Loss = 1.0327e-02, PNorm = 163.2715, GNorm = 0.4278, lr_0 = 3.6842e-04
Loss = 6.7274e-03, PNorm = 163.2853, GNorm = 0.3710, lr_0 = 3.6816e-04
Loss = 7.4657e-03, PNorm = 163.2985, GNorm = 0.2158, lr_0 = 3.6791e-04
Loss = 6.7803e-03, PNorm = 163.3080, GNorm = 0.1540, lr_0 = 3.6766e-04
Loss = 7.5579e-03, PNorm = 163.3204, GNorm = 0.1371, lr_0 = 3.6741e-04
Loss = 8.0971e-03, PNorm = 163.3367, GNorm = 0.4353, lr_0 = 3.6716e-04
Loss = 7.5274e-03, PNorm = 163.3510, GNorm = 0.1147, lr_0 = 3.6690e-04
Loss = 7.2797e-03, PNorm = 163.3660, GNorm = 0.3906, lr_0 = 3.6665e-04
Loss = 6.5020e-03, PNorm = 163.3787, GNorm = 0.1742, lr_0 = 3.6640e-04
Loss = 7.2550e-03, PNorm = 163.3926, GNorm = 0.2886, lr_0 = 3.6615e-04
Loss = 6.9717e-03, PNorm = 163.4045, GNorm = 0.1889, lr_0 = 3.6590e-04
Loss = 6.6504e-03, PNorm = 163.4213, GNorm = 0.1338, lr_0 = 3.6565e-04
Loss = 5.9462e-03, PNorm = 163.4342, GNorm = 0.2174, lr_0 = 3.6540e-04
Loss = 7.8330e-03, PNorm = 163.4456, GNorm = 0.2701, lr_0 = 3.6515e-04
Loss = 6.6580e-03, PNorm = 163.4556, GNorm = 0.2193, lr_0 = 3.6490e-04
Loss = 6.3425e-03, PNorm = 163.4684, GNorm = 0.2883, lr_0 = 3.6465e-04
Loss = 8.6090e-03, PNorm = 163.4850, GNorm = 0.4287, lr_0 = 3.6440e-04
Loss = 6.2021e-03, PNorm = 163.5014, GNorm = 0.1563, lr_0 = 3.6415e-04
Loss = 7.9188e-03, PNorm = 163.5150, GNorm = 0.4918, lr_0 = 3.6390e-04
Loss = 7.1490e-03, PNorm = 163.5309, GNorm = 0.2042, lr_0 = 3.6365e-04
Loss = 7.1112e-03, PNorm = 163.5466, GNorm = 0.3809, lr_0 = 3.6340e-04
Loss = 9.2910e-03, PNorm = 163.5649, GNorm = 0.3025, lr_0 = 3.6315e-04
Loss = 8.8173e-03, PNorm = 163.5861, GNorm = 0.1653, lr_0 = 3.6290e-04
Loss = 8.3824e-03, PNorm = 163.6052, GNorm = 0.2498, lr_0 = 3.6266e-04
Loss = 8.5551e-03, PNorm = 163.6228, GNorm = 0.2509, lr_0 = 3.6241e-04
Loss = 7.9222e-03, PNorm = 163.6352, GNorm = 0.2249, lr_0 = 3.6216e-04
Loss = 7.9965e-03, PNorm = 163.6455, GNorm = 0.1545, lr_0 = 3.6191e-04
Loss = 6.5562e-03, PNorm = 163.6566, GNorm = 0.2303, lr_0 = 3.6166e-04
Loss = 8.0618e-03, PNorm = 163.6751, GNorm = 0.1365, lr_0 = 3.6141e-04
Loss = 7.7625e-03, PNorm = 163.6931, GNorm = 0.4766, lr_0 = 3.6117e-04
Loss = 7.5445e-03, PNorm = 163.7108, GNorm = 0.4957, lr_0 = 3.6092e-04
Loss = 6.8806e-03, PNorm = 163.7223, GNorm = 0.3082, lr_0 = 3.6067e-04
Loss = 8.3373e-03, PNorm = 163.7408, GNorm = 1.3080, lr_0 = 3.6043e-04
Loss = 8.2216e-03, PNorm = 163.7573, GNorm = 0.3608, lr_0 = 3.6018e-04
Loss = 6.4309e-03, PNorm = 163.7732, GNorm = 0.1952, lr_0 = 3.5993e-04
Loss = 7.0922e-03, PNorm = 163.7899, GNorm = 0.2583, lr_0 = 3.5969e-04
Loss = 1.0449e-02, PNorm = 163.8052, GNorm = 0.1681, lr_0 = 3.5944e-04
Loss = 6.5733e-03, PNorm = 163.8249, GNorm = 0.0953, lr_0 = 3.5919e-04
Loss = 6.3791e-03, PNorm = 163.8421, GNorm = 0.1295, lr_0 = 3.5895e-04
Loss = 7.5079e-03, PNorm = 163.8590, GNorm = 0.3232, lr_0 = 3.5870e-04
Loss = 6.3970e-03, PNorm = 163.8774, GNorm = 0.4217, lr_0 = 3.5845e-04
Loss = 6.1869e-03, PNorm = 163.8962, GNorm = 0.2534, lr_0 = 3.5821e-04
Loss = 7.4420e-03, PNorm = 163.9115, GNorm = 0.3607, lr_0 = 3.5796e-04
Loss = 7.2190e-03, PNorm = 163.9239, GNorm = 0.1426, lr_0 = 3.5772e-04
Loss = 6.7729e-03, PNorm = 163.9381, GNorm = 0.1629, lr_0 = 3.5747e-04
Loss = 6.6938e-03, PNorm = 163.9520, GNorm = 0.1957, lr_0 = 3.5723e-04
Loss = 1.0641e-02, PNorm = 163.9679, GNorm = 0.3275, lr_0 = 3.5698e-04
Loss = 8.8584e-03, PNorm = 163.9836, GNorm = 0.4244, lr_0 = 3.5674e-04
Loss = 7.0338e-03, PNorm = 164.0009, GNorm = 0.1589, lr_0 = 3.5650e-04
Loss = 6.7180e-03, PNorm = 164.0165, GNorm = 0.1159, lr_0 = 3.5625e-04
Loss = 1.0885e-02, PNorm = 164.0320, GNorm = 0.1267, lr_0 = 3.5601e-04
Loss = 7.3564e-03, PNorm = 164.0495, GNorm = 0.1074, lr_0 = 3.5576e-04
Loss = 6.6890e-03, PNorm = 164.0646, GNorm = 0.2516, lr_0 = 3.5552e-04
Loss = 7.0904e-03, PNorm = 164.0837, GNorm = 0.5018, lr_0 = 3.5528e-04
Loss = 7.5273e-03, PNorm = 164.1009, GNorm = 0.1777, lr_0 = 3.5503e-04
Loss = 8.7744e-03, PNorm = 164.1151, GNorm = 0.3354, lr_0 = 3.5479e-04
Loss = 1.0898e-02, PNorm = 164.1300, GNorm = 0.2314, lr_0 = 3.5455e-04
Loss = 5.4774e-03, PNorm = 164.1470, GNorm = 0.2230, lr_0 = 3.5430e-04
Loss = 8.3599e-03, PNorm = 164.1641, GNorm = 0.0836, lr_0 = 3.5406e-04
Loss = 6.5190e-03, PNorm = 164.1792, GNorm = 0.2094, lr_0 = 3.5382e-04
Loss = 9.5091e-03, PNorm = 164.1940, GNorm = 0.1633, lr_0 = 3.5358e-04
Loss = 8.4190e-03, PNorm = 164.2111, GNorm = 0.3524, lr_0 = 3.5333e-04
Loss = 8.3058e-03, PNorm = 164.2284, GNorm = 0.3962, lr_0 = 3.5309e-04
Loss = 7.5288e-03, PNorm = 164.2419, GNorm = 0.1297, lr_0 = 3.5285e-04
Loss = 7.1457e-03, PNorm = 164.2594, GNorm = 0.3053, lr_0 = 3.5261e-04
Loss = 6.5860e-03, PNorm = 164.2760, GNorm = 0.1356, lr_0 = 3.5237e-04
Loss = 6.4778e-03, PNorm = 164.2933, GNorm = 0.1650, lr_0 = 3.5212e-04
Loss = 6.5572e-03, PNorm = 164.3107, GNorm = 0.0978, lr_0 = 3.5188e-04
Loss = 6.1087e-03, PNorm = 164.3278, GNorm = 0.1639, lr_0 = 3.5164e-04
Loss = 7.6415e-03, PNorm = 164.3438, GNorm = 0.2239, lr_0 = 3.5140e-04
Loss = 6.5110e-03, PNorm = 164.3585, GNorm = 0.2433, lr_0 = 3.5116e-04
Loss = 5.9763e-03, PNorm = 164.3687, GNorm = 0.4255, lr_0 = 3.5092e-04
Loss = 6.9773e-03, PNorm = 164.3824, GNorm = 0.3564, lr_0 = 3.5068e-04
Loss = 7.5569e-03, PNorm = 164.3988, GNorm = 0.1524, lr_0 = 3.5044e-04
Loss = 6.2166e-03, PNorm = 164.4162, GNorm = 0.1163, lr_0 = 3.5020e-04
Loss = 6.6440e-03, PNorm = 164.4340, GNorm = 0.3037, lr_0 = 3.4996e-04
Loss = 6.5983e-03, PNorm = 164.4548, GNorm = 0.3542, lr_0 = 3.4972e-04
Loss = 7.9221e-03, PNorm = 164.4705, GNorm = 0.7496, lr_0 = 3.4948e-04
Loss = 8.2179e-03, PNorm = 164.4866, GNorm = 0.1808, lr_0 = 3.4924e-04
Loss = 7.8083e-03, PNorm = 164.5023, GNorm = 0.5957, lr_0 = 3.4900e-04
Loss = 7.4512e-03, PNorm = 164.5175, GNorm = 0.2345, lr_0 = 3.4876e-04
Loss = 7.9479e-03, PNorm = 164.5355, GNorm = 0.1578, lr_0 = 3.4852e-04
Loss = 7.3600e-03, PNorm = 164.5553, GNorm = 0.1487, lr_0 = 3.4828e-04
Loss = 8.7440e-03, PNorm = 164.5711, GNorm = 0.2474, lr_0 = 3.4805e-04
Loss = 7.1587e-03, PNorm = 164.5905, GNorm = 0.0799, lr_0 = 3.4781e-04
Loss = 9.4305e-03, PNorm = 164.6095, GNorm = 0.1099, lr_0 = 3.4757e-04
Loss = 7.5240e-03, PNorm = 164.6285, GNorm = 0.2505, lr_0 = 3.4733e-04
Loss = 7.0203e-03, PNorm = 164.6468, GNorm = 0.2367, lr_0 = 3.4709e-04
Loss = 8.6494e-03, PNorm = 164.6644, GNorm = 0.5237, lr_0 = 3.4686e-04
Loss = 8.5935e-03, PNorm = 164.6817, GNorm = 0.1765, lr_0 = 3.4662e-04
Loss = 7.6655e-03, PNorm = 164.6982, GNorm = 0.1681, lr_0 = 3.4638e-04
Loss = 8.5300e-03, PNorm = 164.7154, GNorm = 0.1092, lr_0 = 3.4614e-04
Loss = 8.3386e-03, PNorm = 164.7310, GNorm = 0.2148, lr_0 = 3.4591e-04
Loss = 7.2917e-03, PNorm = 164.7456, GNorm = 0.4810, lr_0 = 3.4567e-04
Loss = 7.9117e-03, PNorm = 164.7616, GNorm = 0.2947, lr_0 = 3.4543e-04
Loss = 7.4206e-03, PNorm = 164.7795, GNorm = 0.1348, lr_0 = 3.4520e-04
Loss = 8.0243e-03, PNorm = 164.7982, GNorm = 0.7515, lr_0 = 3.4496e-04
Loss = 8.0041e-03, PNorm = 164.8139, GNorm = 0.1294, lr_0 = 3.4472e-04
Loss = 8.8373e-03, PNorm = 164.8313, GNorm = 0.1759, lr_0 = 3.4449e-04
Loss = 5.6998e-03, PNorm = 164.8491, GNorm = 0.1984, lr_0 = 3.4425e-04
Loss = 8.5770e-03, PNorm = 164.8649, GNorm = 0.1352, lr_0 = 3.4402e-04
Loss = 7.8234e-03, PNorm = 164.8778, GNorm = 0.0986, lr_0 = 3.4378e-04
Loss = 6.0721e-03, PNorm = 164.8918, GNorm = 0.1299, lr_0 = 3.4354e-04
Loss = 6.8241e-03, PNorm = 164.9070, GNorm = 0.3691, lr_0 = 3.4331e-04
Validation mae = 0.279842
Epoch 15
Loss = 5.5753e-03, PNorm = 164.9216, GNorm = 0.2734, lr_0 = 3.4307e-04
Loss = 6.6105e-03, PNorm = 164.9343, GNorm = 0.6340, lr_0 = 3.4284e-04
Loss = 6.7322e-03, PNorm = 164.9457, GNorm = 0.1944, lr_0 = 3.4260e-04
Loss = 6.7040e-03, PNorm = 164.9581, GNorm = 0.1797, lr_0 = 3.4237e-04
Loss = 6.4336e-03, PNorm = 164.9712, GNorm = 0.1481, lr_0 = 3.4213e-04
Loss = 5.5499e-03, PNorm = 164.9826, GNorm = 0.4696, lr_0 = 3.4190e-04
Loss = 6.4093e-03, PNorm = 164.9944, GNorm = 0.3995, lr_0 = 3.4167e-04
Loss = 5.8761e-03, PNorm = 165.0057, GNorm = 0.3060, lr_0 = 3.4143e-04
Loss = 7.1463e-03, PNorm = 165.0159, GNorm = 0.5472, lr_0 = 3.4120e-04
Loss = 7.2613e-03, PNorm = 165.0251, GNorm = 0.4270, lr_0 = 3.4096e-04
Loss = 7.3971e-03, PNorm = 165.0373, GNorm = 0.4441, lr_0 = 3.4073e-04
Loss = 5.3335e-03, PNorm = 165.0525, GNorm = 0.2301, lr_0 = 3.4050e-04
Loss = 7.3760e-03, PNorm = 165.0651, GNorm = 0.2080, lr_0 = 3.4026e-04
Loss = 6.0074e-03, PNorm = 165.0770, GNorm = 0.1206, lr_0 = 3.4003e-04
Loss = 5.8188e-03, PNorm = 165.0920, GNorm = 0.2074, lr_0 = 3.3980e-04
Loss = 7.1211e-03, PNorm = 165.1019, GNorm = 0.2360, lr_0 = 3.3956e-04
Loss = 6.3456e-03, PNorm = 165.1129, GNorm = 0.1797, lr_0 = 3.3933e-04
Loss = 7.5746e-03, PNorm = 165.1243, GNorm = 0.2361, lr_0 = 3.3910e-04
Loss = 7.8365e-03, PNorm = 165.1363, GNorm = 0.0977, lr_0 = 3.3887e-04
Loss = 6.5393e-03, PNorm = 165.1494, GNorm = 0.2135, lr_0 = 3.3864e-04
Loss = 6.1781e-03, PNorm = 165.1586, GNorm = 0.2904, lr_0 = 3.3840e-04
Loss = 6.4690e-03, PNorm = 165.1717, GNorm = 0.4259, lr_0 = 3.3817e-04
Loss = 5.6177e-03, PNorm = 165.1833, GNorm = 0.1350, lr_0 = 3.3794e-04
Loss = 7.1162e-03, PNorm = 165.1919, GNorm = 0.2275, lr_0 = 3.3771e-04
Loss = 7.8532e-03, PNorm = 165.2051, GNorm = 0.1704, lr_0 = 3.3748e-04
Loss = 4.6055e-03, PNorm = 165.2186, GNorm = 0.1108, lr_0 = 3.3725e-04
Loss = 6.3538e-03, PNorm = 165.2284, GNorm = 0.2944, lr_0 = 3.3701e-04
Loss = 8.2357e-03, PNorm = 165.2402, GNorm = 0.2881, lr_0 = 3.3678e-04
Loss = 5.6851e-03, PNorm = 165.2563, GNorm = 0.4127, lr_0 = 3.3655e-04
Loss = 5.9702e-03, PNorm = 165.2702, GNorm = 0.1763, lr_0 = 3.3632e-04
Loss = 5.4180e-03, PNorm = 165.2817, GNorm = 0.2489, lr_0 = 3.3609e-04
Loss = 6.5347e-03, PNorm = 165.2888, GNorm = 0.2034, lr_0 = 3.3586e-04
Loss = 5.7629e-03, PNorm = 165.3002, GNorm = 0.1664, lr_0 = 3.3563e-04
Loss = 6.0863e-03, PNorm = 165.3127, GNorm = 0.2445, lr_0 = 3.3540e-04
Loss = 5.3993e-03, PNorm = 165.3217, GNorm = 0.1253, lr_0 = 3.3517e-04
Loss = 8.4091e-03, PNorm = 165.3333, GNorm = 0.1704, lr_0 = 3.3494e-04
Loss = 5.4003e-03, PNorm = 165.3471, GNorm = 0.2842, lr_0 = 3.3471e-04
Loss = 6.6598e-03, PNorm = 165.3612, GNorm = 0.2636, lr_0 = 3.3448e-04
Loss = 5.5765e-03, PNorm = 165.3753, GNorm = 0.2142, lr_0 = 3.3425e-04
Loss = 6.1309e-03, PNorm = 165.3855, GNorm = 0.1513, lr_0 = 3.3403e-04
Loss = 7.2642e-03, PNorm = 165.3948, GNorm = 0.3343, lr_0 = 3.3380e-04
Loss = 5.4236e-03, PNorm = 165.4073, GNorm = 0.3262, lr_0 = 3.3357e-04
Loss = 6.1649e-03, PNorm = 165.4179, GNorm = 0.1692, lr_0 = 3.3334e-04
Loss = 6.3358e-03, PNorm = 165.4304, GNorm = 0.1015, lr_0 = 3.3311e-04
Loss = 5.9408e-03, PNorm = 165.4440, GNorm = 0.1796, lr_0 = 3.3288e-04
Loss = 8.0126e-03, PNorm = 165.4593, GNorm = 0.3954, lr_0 = 3.3265e-04
Loss = 9.0386e-03, PNorm = 165.4678, GNorm = 0.4890, lr_0 = 3.3243e-04
Loss = 6.3628e-03, PNorm = 165.4819, GNorm = 0.2638, lr_0 = 3.3220e-04
Loss = 6.9167e-03, PNorm = 165.4955, GNorm = 0.3876, lr_0 = 3.3197e-04
Loss = 7.2034e-03, PNorm = 165.5085, GNorm = 0.4154, lr_0 = 3.3174e-04
Loss = 8.1455e-03, PNorm = 165.5199, GNorm = 0.0586, lr_0 = 3.3152e-04
Loss = 4.8374e-03, PNorm = 165.5318, GNorm = 0.0823, lr_0 = 3.3129e-04
Loss = 7.0067e-03, PNorm = 165.5457, GNorm = 0.1099, lr_0 = 3.3106e-04
Loss = 4.9625e-03, PNorm = 165.5585, GNorm = 0.1957, lr_0 = 3.3084e-04
Loss = 6.6978e-03, PNorm = 165.5702, GNorm = 0.1451, lr_0 = 3.3061e-04
Loss = 5.7543e-03, PNorm = 165.5825, GNorm = 0.0760, lr_0 = 3.3038e-04
Loss = 4.9824e-03, PNorm = 165.5945, GNorm = 0.0728, lr_0 = 3.3016e-04
Loss = 6.6382e-03, PNorm = 165.6046, GNorm = 0.2141, lr_0 = 3.2993e-04
Loss = 5.0684e-03, PNorm = 165.6176, GNorm = 0.2449, lr_0 = 3.2970e-04
Loss = 5.0501e-03, PNorm = 165.6310, GNorm = 0.1291, lr_0 = 3.2948e-04
Loss = 4.8389e-03, PNorm = 165.6428, GNorm = 0.0987, lr_0 = 3.2925e-04
Loss = 6.0731e-03, PNorm = 165.6527, GNorm = 0.1045, lr_0 = 3.2903e-04
Loss = 5.9391e-03, PNorm = 165.6627, GNorm = 0.0878, lr_0 = 3.2880e-04
Loss = 6.3043e-03, PNorm = 165.6758, GNorm = 0.1512, lr_0 = 3.2858e-04
Loss = 5.2417e-03, PNorm = 165.6893, GNorm = 0.1800, lr_0 = 3.2835e-04
Loss = 4.6320e-03, PNorm = 165.7016, GNorm = 0.1164, lr_0 = 3.2813e-04
Loss = 6.4091e-03, PNorm = 165.7128, GNorm = 0.2271, lr_0 = 3.2790e-04
Loss = 5.3983e-03, PNorm = 165.7245, GNorm = 0.1518, lr_0 = 3.2768e-04
Loss = 6.7813e-03, PNorm = 165.7341, GNorm = 0.2121, lr_0 = 3.2745e-04
Loss = 5.6598e-03, PNorm = 165.7468, GNorm = 0.1755, lr_0 = 3.2723e-04
Loss = 9.6457e-03, PNorm = 165.7585, GNorm = 0.1913, lr_0 = 3.2700e-04
Loss = 4.5867e-03, PNorm = 165.7729, GNorm = 0.2642, lr_0 = 3.2678e-04
Loss = 5.3564e-03, PNorm = 165.7850, GNorm = 0.1762, lr_0 = 3.2656e-04
Loss = 6.4594e-03, PNorm = 165.7990, GNorm = 0.1610, lr_0 = 3.2633e-04
Loss = 5.6818e-03, PNorm = 165.8124, GNorm = 0.2931, lr_0 = 3.2611e-04
Loss = 6.8557e-03, PNorm = 165.8243, GNorm = 0.3308, lr_0 = 3.2589e-04
Loss = 4.4659e-03, PNorm = 165.8363, GNorm = 0.2918, lr_0 = 3.2566e-04
Loss = 5.8869e-03, PNorm = 165.8495, GNorm = 0.1573, lr_0 = 3.2544e-04
Loss = 5.9445e-03, PNorm = 165.8628, GNorm = 0.2454, lr_0 = 3.2522e-04
Loss = 6.9213e-03, PNorm = 165.8774, GNorm = 0.3286, lr_0 = 3.2499e-04
Loss = 5.3782e-03, PNorm = 165.8888, GNorm = 0.3104, lr_0 = 3.2477e-04
Loss = 5.8048e-03, PNorm = 165.9020, GNorm = 0.3568, lr_0 = 3.2455e-04
Loss = 5.8792e-03, PNorm = 165.9125, GNorm = 0.1671, lr_0 = 3.2433e-04
Loss = 5.8088e-03, PNorm = 165.9232, GNorm = 0.4464, lr_0 = 3.2410e-04
Loss = 5.3526e-03, PNorm = 165.9354, GNorm = 0.1233, lr_0 = 3.2388e-04
Loss = 6.2979e-03, PNorm = 165.9437, GNorm = 0.1498, lr_0 = 3.2366e-04
Loss = 5.8650e-03, PNorm = 165.9559, GNorm = 0.1507, lr_0 = 3.2344e-04
Loss = 7.2744e-03, PNorm = 165.9703, GNorm = 0.2995, lr_0 = 3.2322e-04
Loss = 6.2652e-03, PNorm = 165.9857, GNorm = 0.2484, lr_0 = 3.2300e-04
Loss = 5.3069e-03, PNorm = 165.9990, GNorm = 0.0845, lr_0 = 3.2277e-04
Loss = 6.3188e-03, PNorm = 166.0106, GNorm = 0.1321, lr_0 = 3.2255e-04
Loss = 6.5879e-03, PNorm = 166.0256, GNorm = 0.2142, lr_0 = 3.2233e-04
Loss = 6.4813e-03, PNorm = 166.0390, GNorm = 0.2012, lr_0 = 3.2211e-04
Loss = 5.5497e-03, PNorm = 166.0534, GNorm = 0.2694, lr_0 = 3.2189e-04
Loss = 7.6244e-03, PNorm = 166.0649, GNorm = 0.3099, lr_0 = 3.2167e-04
Loss = 5.8422e-03, PNorm = 166.0766, GNorm = 0.1828, lr_0 = 3.2145e-04
Loss = 5.6629e-03, PNorm = 166.0874, GNorm = 0.1710, lr_0 = 3.2123e-04
Loss = 5.7375e-03, PNorm = 166.0996, GNorm = 0.1647, lr_0 = 3.2101e-04
Loss = 4.8447e-03, PNorm = 166.1119, GNorm = 0.2390, lr_0 = 3.2079e-04
Loss = 5.2634e-03, PNorm = 166.1236, GNorm = 0.1676, lr_0 = 3.2057e-04
Loss = 8.5970e-03, PNorm = 166.1350, GNorm = 0.2298, lr_0 = 3.2035e-04
Loss = 6.0077e-03, PNorm = 166.1514, GNorm = 0.2371, lr_0 = 3.2013e-04
Loss = 8.3992e-03, PNorm = 166.1646, GNorm = 0.2292, lr_0 = 3.1991e-04
Loss = 6.9217e-03, PNorm = 166.1781, GNorm = 0.4082, lr_0 = 3.1969e-04
Loss = 5.5213e-03, PNorm = 166.1922, GNorm = 0.3109, lr_0 = 3.1947e-04
Loss = 7.4640e-03, PNorm = 166.2079, GNorm = 0.5308, lr_0 = 3.1925e-04
Loss = 6.0251e-03, PNorm = 166.2267, GNorm = 0.4059, lr_0 = 3.1904e-04
Loss = 6.8551e-03, PNorm = 166.2429, GNorm = 0.1918, lr_0 = 3.1882e-04
Loss = 7.6139e-03, PNorm = 166.2561, GNorm = 0.1560, lr_0 = 3.1860e-04
Loss = 5.7571e-03, PNorm = 166.2703, GNorm = 0.1670, lr_0 = 3.1838e-04
Loss = 7.1924e-03, PNorm = 166.2849, GNorm = 0.3571, lr_0 = 3.1816e-04
Loss = 7.1973e-03, PNorm = 166.2994, GNorm = 0.1128, lr_0 = 3.1794e-04
Loss = 5.9972e-03, PNorm = 166.3114, GNorm = 0.2047, lr_0 = 3.1773e-04
Loss = 7.7783e-03, PNorm = 166.3247, GNorm = 0.0936, lr_0 = 3.1751e-04
Loss = 5.8120e-03, PNorm = 166.3391, GNorm = 0.5245, lr_0 = 3.1729e-04
Loss = 6.1355e-03, PNorm = 166.3519, GNorm = 0.0997, lr_0 = 3.1707e-04
Loss = 6.6061e-03, PNorm = 166.3654, GNorm = 0.2884, lr_0 = 3.1686e-04
Loss = 6.4669e-03, PNorm = 166.3820, GNorm = 0.2910, lr_0 = 3.1664e-04
Loss = 5.9215e-03, PNorm = 166.3962, GNorm = 0.5455, lr_0 = 3.1642e-04
Loss = 6.2569e-03, PNorm = 166.4127, GNorm = 0.0699, lr_0 = 3.1621e-04
Validation mae = 0.278828
Epoch 16
Loss = 5.1092e-03, PNorm = 166.4220, GNorm = 0.2552, lr_0 = 3.1599e-04
Loss = 5.1333e-03, PNorm = 166.4320, GNorm = 0.2542, lr_0 = 3.1577e-04
Loss = 4.5760e-03, PNorm = 166.4401, GNorm = 0.1938, lr_0 = 3.1556e-04
Loss = 5.2237e-03, PNorm = 166.4456, GNorm = 0.1553, lr_0 = 3.1534e-04
Loss = 5.8743e-03, PNorm = 166.4534, GNorm = 0.2130, lr_0 = 3.1512e-04
Loss = 6.8409e-03, PNorm = 166.4610, GNorm = 0.1739, lr_0 = 3.1491e-04
Loss = 6.2265e-03, PNorm = 166.4723, GNorm = 0.2298, lr_0 = 3.1469e-04
Loss = 7.1148e-03, PNorm = 166.4792, GNorm = 0.3029, lr_0 = 3.1448e-04
Loss = 5.3694e-03, PNorm = 166.4866, GNorm = 0.2917, lr_0 = 3.1426e-04
Loss = 4.2603e-03, PNorm = 166.4956, GNorm = 0.0650, lr_0 = 3.1405e-04
Loss = 4.8975e-03, PNorm = 166.5050, GNorm = 0.2506, lr_0 = 3.1383e-04
Loss = 6.5159e-03, PNorm = 166.5157, GNorm = 0.2408, lr_0 = 3.1362e-04
Loss = 4.8710e-03, PNorm = 166.5277, GNorm = 0.1584, lr_0 = 3.1340e-04
Loss = 5.2169e-03, PNorm = 166.5378, GNorm = 0.1036, lr_0 = 3.1319e-04
Loss = 4.3795e-03, PNorm = 166.5471, GNorm = 0.1410, lr_0 = 3.1297e-04
Loss = 5.6828e-03, PNorm = 166.5535, GNorm = 0.1473, lr_0 = 3.1276e-04
Loss = 4.6481e-03, PNorm = 166.5609, GNorm = 0.1965, lr_0 = 3.1254e-04
Loss = 5.2088e-03, PNorm = 166.5681, GNorm = 0.1877, lr_0 = 3.1233e-04
Loss = 5.3173e-03, PNorm = 166.5811, GNorm = 0.2470, lr_0 = 3.1212e-04
Loss = 5.4763e-03, PNorm = 166.5932, GNorm = 0.2138, lr_0 = 3.1190e-04
Loss = 4.6640e-03, PNorm = 166.6055, GNorm = 0.1126, lr_0 = 3.1169e-04
Loss = 4.8144e-03, PNorm = 166.6159, GNorm = 0.3188, lr_0 = 3.1147e-04
Loss = 4.4747e-03, PNorm = 166.6226, GNorm = 0.0827, lr_0 = 3.1126e-04
Loss = 4.8392e-03, PNorm = 166.6327, GNorm = 0.2355, lr_0 = 3.1105e-04
Loss = 6.0621e-03, PNorm = 166.6420, GNorm = 0.2121, lr_0 = 3.1083e-04
Loss = 5.7460e-03, PNorm = 166.6531, GNorm = 0.2491, lr_0 = 3.1062e-04
Loss = 5.1978e-03, PNorm = 166.6638, GNorm = 0.2991, lr_0 = 3.1041e-04
Loss = 5.9305e-03, PNorm = 166.6705, GNorm = 0.0760, lr_0 = 3.1020e-04
Loss = 3.8169e-03, PNorm = 166.6794, GNorm = 0.2112, lr_0 = 3.0998e-04
Loss = 4.7628e-03, PNorm = 166.6887, GNorm = 0.2708, lr_0 = 3.0977e-04
Loss = 5.8490e-03, PNorm = 166.6989, GNorm = 0.1652, lr_0 = 3.0956e-04
Loss = 4.9557e-03, PNorm = 166.7078, GNorm = 0.1375, lr_0 = 3.0935e-04
Loss = 4.5262e-03, PNorm = 166.7158, GNorm = 0.3409, lr_0 = 3.0914e-04
Loss = 4.7915e-03, PNorm = 166.7263, GNorm = 0.1488, lr_0 = 3.0892e-04
Loss = 4.6639e-03, PNorm = 166.7378, GNorm = 0.1665, lr_0 = 3.0871e-04
Loss = 5.2810e-03, PNorm = 166.7491, GNorm = 0.1389, lr_0 = 3.0850e-04
Loss = 6.2048e-03, PNorm = 166.7631, GNorm = 0.5223, lr_0 = 3.0829e-04
Loss = 4.5559e-03, PNorm = 166.7722, GNorm = 0.1122, lr_0 = 3.0808e-04
Loss = 7.7478e-03, PNorm = 166.7853, GNorm = 0.3355, lr_0 = 3.0787e-04
Loss = 4.5943e-03, PNorm = 166.7936, GNorm = 0.4807, lr_0 = 3.0766e-04
Loss = 5.3619e-03, PNorm = 166.8034, GNorm = 0.2593, lr_0 = 3.0745e-04
Loss = 4.8478e-03, PNorm = 166.8124, GNorm = 0.0588, lr_0 = 3.0723e-04
Loss = 5.2857e-03, PNorm = 166.8225, GNorm = 0.1239, lr_0 = 3.0702e-04
Loss = 5.1329e-03, PNorm = 166.8302, GNorm = 0.1607, lr_0 = 3.0681e-04
Loss = 4.2075e-03, PNorm = 166.8402, GNorm = 0.1162, lr_0 = 3.0660e-04
Loss = 4.0251e-03, PNorm = 166.8524, GNorm = 0.2543, lr_0 = 3.0639e-04
Loss = 5.9978e-03, PNorm = 166.8616, GNorm = 0.6094, lr_0 = 3.0618e-04
Loss = 4.6075e-03, PNorm = 166.8709, GNorm = 0.2417, lr_0 = 3.0597e-04
Loss = 4.6725e-03, PNorm = 166.8802, GNorm = 0.2729, lr_0 = 3.0576e-04
Loss = 4.9672e-03, PNorm = 166.8890, GNorm = 0.1521, lr_0 = 3.0555e-04
Loss = 4.6334e-03, PNorm = 166.8990, GNorm = 0.2162, lr_0 = 3.0535e-04
Loss = 5.0727e-03, PNorm = 166.9083, GNorm = 0.1237, lr_0 = 3.0514e-04
Loss = 5.5982e-03, PNorm = 166.9179, GNorm = 0.2818, lr_0 = 3.0493e-04
Loss = 4.3103e-03, PNorm = 166.9253, GNorm = 0.1323, lr_0 = 3.0472e-04
Loss = 4.3691e-03, PNorm = 166.9340, GNorm = 0.2077, lr_0 = 3.0451e-04
Loss = 4.6644e-03, PNorm = 166.9424, GNorm = 0.1596, lr_0 = 3.0430e-04
Loss = 6.2860e-03, PNorm = 166.9508, GNorm = 0.1482, lr_0 = 3.0409e-04
Loss = 4.7875e-03, PNorm = 166.9606, GNorm = 0.2348, lr_0 = 3.0388e-04
Loss = 6.1277e-03, PNorm = 166.9710, GNorm = 0.2131, lr_0 = 3.0368e-04
Loss = 6.1186e-03, PNorm = 166.9850, GNorm = 0.2498, lr_0 = 3.0347e-04
Loss = 4.7565e-03, PNorm = 166.9945, GNorm = 0.2241, lr_0 = 3.0326e-04
Loss = 6.6409e-03, PNorm = 167.0047, GNorm = 0.4263, lr_0 = 3.0305e-04
Loss = 4.4063e-03, PNorm = 167.0133, GNorm = 0.2607, lr_0 = 3.0284e-04
Loss = 4.3211e-03, PNorm = 167.0237, GNorm = 0.1480, lr_0 = 3.0264e-04
Loss = 6.9707e-03, PNorm = 167.0365, GNorm = 0.1805, lr_0 = 3.0243e-04
Loss = 6.0788e-03, PNorm = 167.0462, GNorm = 0.3280, lr_0 = 3.0222e-04
Loss = 5.3714e-03, PNorm = 167.0563, GNorm = 0.3546, lr_0 = 3.0202e-04
Loss = 4.3762e-03, PNorm = 167.0671, GNorm = 0.1612, lr_0 = 3.0181e-04
Loss = 4.8504e-03, PNorm = 167.0750, GNorm = 0.2611, lr_0 = 3.0160e-04
Loss = 4.3775e-03, PNorm = 167.0853, GNorm = 0.2971, lr_0 = 3.0140e-04
Loss = 6.0506e-03, PNorm = 167.0941, GNorm = 0.4664, lr_0 = 3.0119e-04
Loss = 5.5945e-03, PNorm = 167.1029, GNorm = 0.1019, lr_0 = 3.0098e-04
Loss = 6.0774e-03, PNorm = 167.1174, GNorm = 0.1506, lr_0 = 3.0078e-04
Loss = 8.3215e-03, PNorm = 167.1285, GNorm = 0.5717, lr_0 = 3.0057e-04
Loss = 4.4027e-03, PNorm = 167.1420, GNorm = 0.0841, lr_0 = 3.0036e-04
Loss = 4.8368e-03, PNorm = 167.1534, GNorm = 0.2291, lr_0 = 3.0016e-04
Loss = 5.5347e-03, PNorm = 167.1641, GNorm = 0.1339, lr_0 = 2.9995e-04
Loss = 4.7197e-03, PNorm = 167.1752, GNorm = 0.1649, lr_0 = 2.9975e-04
Loss = 5.0821e-03, PNorm = 167.1855, GNorm = 0.1407, lr_0 = 2.9954e-04
Loss = 7.6786e-03, PNorm = 167.1968, GNorm = 0.2143, lr_0 = 2.9934e-04
Loss = 3.5881e-03, PNorm = 167.2058, GNorm = 0.4298, lr_0 = 2.9913e-04
Loss = 4.5867e-03, PNorm = 167.2150, GNorm = 0.4233, lr_0 = 2.9893e-04
Loss = 6.5344e-03, PNorm = 167.2259, GNorm = 0.2823, lr_0 = 2.9872e-04
Loss = 6.1976e-03, PNorm = 167.2356, GNorm = 0.2612, lr_0 = 2.9852e-04
Loss = 6.2320e-03, PNorm = 167.2478, GNorm = 0.0919, lr_0 = 2.9831e-04
Loss = 4.5591e-03, PNorm = 167.2613, GNorm = 0.4141, lr_0 = 2.9811e-04
Loss = 3.9632e-03, PNorm = 167.2727, GNorm = 0.2463, lr_0 = 2.9790e-04
Loss = 5.1037e-03, PNorm = 167.2823, GNorm = 0.1666, lr_0 = 2.9770e-04
Loss = 5.1302e-03, PNorm = 167.2930, GNorm = 0.2305, lr_0 = 2.9750e-04
Loss = 3.9889e-03, PNorm = 167.3011, GNorm = 0.0734, lr_0 = 2.9729e-04
Loss = 4.3587e-03, PNorm = 167.3092, GNorm = 0.2865, lr_0 = 2.9709e-04
Loss = 5.1615e-03, PNorm = 167.3186, GNorm = 0.1335, lr_0 = 2.9689e-04
Loss = 5.2315e-03, PNorm = 167.3265, GNorm = 0.5925, lr_0 = 2.9668e-04
Loss = 5.0642e-03, PNorm = 167.3363, GNorm = 0.1268, lr_0 = 2.9648e-04
Loss = 6.3789e-03, PNorm = 167.3444, GNorm = 0.2566, lr_0 = 2.9628e-04
Loss = 4.8502e-03, PNorm = 167.3569, GNorm = 0.2464, lr_0 = 2.9607e-04
Loss = 7.6343e-03, PNorm = 167.3682, GNorm = 0.2718, lr_0 = 2.9587e-04
Loss = 4.6246e-03, PNorm = 167.3801, GNorm = 0.1214, lr_0 = 2.9567e-04
Loss = 4.9236e-03, PNorm = 167.3921, GNorm = 0.3127, lr_0 = 2.9546e-04
Loss = 4.7182e-03, PNorm = 167.4026, GNorm = 0.1637, lr_0 = 2.9526e-04
Loss = 5.3956e-03, PNorm = 167.4121, GNorm = 0.1831, lr_0 = 2.9506e-04
Loss = 4.2220e-03, PNorm = 167.4216, GNorm = 0.3633, lr_0 = 2.9486e-04
Loss = 4.5638e-03, PNorm = 167.4324, GNorm = 0.1596, lr_0 = 2.9466e-04
Loss = 5.8916e-03, PNorm = 167.4421, GNorm = 0.1579, lr_0 = 2.9445e-04
Loss = 4.4367e-03, PNorm = 167.4533, GNorm = 0.2094, lr_0 = 2.9425e-04
Loss = 4.5633e-03, PNorm = 167.4643, GNorm = 0.2200, lr_0 = 2.9405e-04
Loss = 6.1027e-03, PNorm = 167.4739, GNorm = 0.3302, lr_0 = 2.9385e-04
Loss = 4.4044e-03, PNorm = 167.4837, GNorm = 0.0796, lr_0 = 2.9365e-04
Loss = 4.8331e-03, PNorm = 167.4950, GNorm = 0.2115, lr_0 = 2.9345e-04
Loss = 5.6575e-03, PNorm = 167.5049, GNorm = 0.3180, lr_0 = 2.9325e-04
Loss = 4.3411e-03, PNorm = 167.5181, GNorm = 0.1286, lr_0 = 2.9305e-04
Loss = 4.8795e-03, PNorm = 167.5324, GNorm = 0.2851, lr_0 = 2.9284e-04
Loss = 5.5985e-03, PNorm = 167.5501, GNorm = 0.1884, lr_0 = 2.9264e-04
Loss = 4.2694e-03, PNorm = 167.5638, GNorm = 0.1986, lr_0 = 2.9244e-04
Loss = 5.1024e-03, PNorm = 167.5724, GNorm = 0.2033, lr_0 = 2.9224e-04
Loss = 4.9706e-03, PNorm = 167.5796, GNorm = 0.1627, lr_0 = 2.9204e-04
Loss = 5.2488e-03, PNorm = 167.5878, GNorm = 0.1589, lr_0 = 2.9184e-04
Loss = 4.3751e-03, PNorm = 167.6006, GNorm = 0.2416, lr_0 = 2.9164e-04
Loss = 4.6340e-03, PNorm = 167.6133, GNorm = 0.1618, lr_0 = 2.9144e-04
Loss = 4.0898e-03, PNorm = 167.6249, GNorm = 0.1737, lr_0 = 2.9124e-04
Validation mae = 0.279216
Epoch 17
Loss = 3.7700e-03, PNorm = 167.6333, GNorm = 0.1784, lr_0 = 2.9104e-04
Loss = 5.1233e-03, PNorm = 167.6430, GNorm = 0.1615, lr_0 = 2.9084e-04
Loss = 4.1462e-03, PNorm = 167.6518, GNorm = 0.2216, lr_0 = 2.9065e-04
Loss = 4.3125e-03, PNorm = 167.6607, GNorm = 0.0878, lr_0 = 2.9045e-04
Loss = 4.2235e-03, PNorm = 167.6646, GNorm = 0.2363, lr_0 = 2.9025e-04
Loss = 4.5700e-03, PNorm = 167.6741, GNorm = 0.0634, lr_0 = 2.9005e-04
Loss = 7.6085e-03, PNorm = 167.6835, GNorm = 0.2125, lr_0 = 2.8985e-04
Loss = 5.0591e-03, PNorm = 167.6927, GNorm = 0.1627, lr_0 = 2.8965e-04
Loss = 4.0083e-03, PNorm = 167.6994, GNorm = 0.2605, lr_0 = 2.8945e-04
Loss = 4.1124e-03, PNorm = 167.7079, GNorm = 0.1793, lr_0 = 2.8925e-04
Loss = 4.8177e-03, PNorm = 167.7134, GNorm = 0.1620, lr_0 = 2.8906e-04
Loss = 4.0317e-03, PNorm = 167.7229, GNorm = 0.1504, lr_0 = 2.8886e-04
Loss = 6.6173e-03, PNorm = 167.7326, GNorm = 0.2292, lr_0 = 2.8866e-04
Loss = 4.0713e-03, PNorm = 167.7403, GNorm = 0.3371, lr_0 = 2.8846e-04
Loss = 3.5996e-03, PNorm = 167.7498, GNorm = 0.0920, lr_0 = 2.8826e-04
Loss = 3.7869e-03, PNorm = 167.7563, GNorm = 0.2118, lr_0 = 2.8807e-04
Loss = 4.7613e-03, PNorm = 167.7631, GNorm = 0.6290, lr_0 = 2.8787e-04
Loss = 4.4975e-03, PNorm = 167.7691, GNorm = 0.1554, lr_0 = 2.8767e-04
Loss = 4.6218e-03, PNorm = 167.7758, GNorm = 0.2786, lr_0 = 2.8748e-04
Loss = 5.3823e-03, PNorm = 167.7852, GNorm = 0.1913, lr_0 = 2.8728e-04
Loss = 4.3871e-03, PNorm = 167.7960, GNorm = 0.1638, lr_0 = 2.8708e-04
Loss = 4.1927e-03, PNorm = 167.8084, GNorm = 0.1679, lr_0 = 2.8689e-04
Loss = 4.2521e-03, PNorm = 167.8178, GNorm = 0.5110, lr_0 = 2.8669e-04
Loss = 4.6319e-03, PNorm = 167.8295, GNorm = 0.3100, lr_0 = 2.8649e-04
Loss = 6.0312e-03, PNorm = 167.8390, GNorm = 0.2605, lr_0 = 2.8630e-04
Loss = 4.5715e-03, PNorm = 167.8472, GNorm = 0.0615, lr_0 = 2.8610e-04
Loss = 4.0970e-03, PNorm = 167.8547, GNorm = 0.0733, lr_0 = 2.8590e-04
Loss = 5.0635e-03, PNorm = 167.8624, GNorm = 0.1083, lr_0 = 2.8571e-04
Loss = 4.6918e-03, PNorm = 167.8714, GNorm = 0.0849, lr_0 = 2.8551e-04
Loss = 4.0786e-03, PNorm = 167.8804, GNorm = 0.1210, lr_0 = 2.8532e-04
Loss = 3.3268e-03, PNorm = 167.8880, GNorm = 0.0766, lr_0 = 2.8512e-04
Loss = 4.9675e-03, PNorm = 167.8956, GNorm = 0.2062, lr_0 = 2.8493e-04
Loss = 4.8114e-03, PNorm = 167.9041, GNorm = 0.1680, lr_0 = 2.8473e-04
Loss = 4.4888e-03, PNorm = 167.9122, GNorm = 0.2339, lr_0 = 2.8454e-04
Loss = 3.4792e-03, PNorm = 167.9203, GNorm = 0.2310, lr_0 = 2.8434e-04
Loss = 4.5805e-03, PNorm = 167.9286, GNorm = 0.1197, lr_0 = 2.8415e-04
Loss = 7.0314e-03, PNorm = 167.9371, GNorm = 0.1494, lr_0 = 2.8395e-04
Loss = 4.9866e-03, PNorm = 167.9437, GNorm = 0.2196, lr_0 = 2.8376e-04
Loss = 4.0698e-03, PNorm = 167.9523, GNorm = 0.1309, lr_0 = 2.8356e-04
Loss = 3.6106e-03, PNorm = 167.9614, GNorm = 0.1617, lr_0 = 2.8337e-04
Loss = 4.7787e-03, PNorm = 167.9745, GNorm = 0.1439, lr_0 = 2.8317e-04
Loss = 4.2355e-03, PNorm = 167.9857, GNorm = 0.1099, lr_0 = 2.8298e-04
Loss = 3.9907e-03, PNorm = 167.9934, GNorm = 0.2023, lr_0 = 2.8279e-04
Loss = 3.5869e-03, PNorm = 168.0016, GNorm = 0.2272, lr_0 = 2.8259e-04
Loss = 4.5520e-03, PNorm = 168.0099, GNorm = 0.3069, lr_0 = 2.8240e-04
Loss = 5.2845e-03, PNorm = 168.0181, GNorm = 0.2565, lr_0 = 2.8221e-04
Loss = 4.2148e-03, PNorm = 168.0272, GNorm = 0.2433, lr_0 = 2.8201e-04
Loss = 7.6937e-03, PNorm = 168.0376, GNorm = 0.3981, lr_0 = 2.8182e-04
Loss = 4.0098e-03, PNorm = 168.0484, GNorm = 0.0716, lr_0 = 2.8163e-04
Loss = 5.1499e-03, PNorm = 168.0564, GNorm = 0.1416, lr_0 = 2.8143e-04
Loss = 3.6735e-03, PNorm = 168.0661, GNorm = 0.1634, lr_0 = 2.8124e-04
Loss = 3.8744e-03, PNorm = 168.0771, GNorm = 0.0552, lr_0 = 2.8105e-04
Loss = 4.1214e-03, PNorm = 168.0844, GNorm = 0.1045, lr_0 = 2.8085e-04
Loss = 3.6297e-03, PNorm = 168.0965, GNorm = 0.1612, lr_0 = 2.8066e-04
Loss = 6.3702e-03, PNorm = 168.1064, GNorm = 0.6857, lr_0 = 2.8047e-04
Loss = 4.8582e-03, PNorm = 168.1159, GNorm = 0.4388, lr_0 = 2.8028e-04
Loss = 5.1480e-03, PNorm = 168.1230, GNorm = 0.0652, lr_0 = 2.8009e-04
Loss = 4.4058e-03, PNorm = 168.1322, GNorm = 0.1178, lr_0 = 2.7989e-04
Loss = 7.6033e-03, PNorm = 168.1421, GNorm = 0.0867, lr_0 = 2.7970e-04
Loss = 5.1806e-03, PNorm = 168.1523, GNorm = 0.2797, lr_0 = 2.7951e-04
Loss = 3.7383e-03, PNorm = 168.1586, GNorm = 0.1226, lr_0 = 2.7932e-04
Loss = 5.1406e-03, PNorm = 168.1655, GNorm = 0.4989, lr_0 = 2.7913e-04
Loss = 4.1347e-03, PNorm = 168.1711, GNorm = 0.1751, lr_0 = 2.7894e-04
Loss = 3.8905e-03, PNorm = 168.1797, GNorm = 0.1628, lr_0 = 2.7875e-04
Loss = 4.5391e-03, PNorm = 168.1892, GNorm = 0.0758, lr_0 = 2.7855e-04
Loss = 4.5833e-03, PNorm = 168.2000, GNorm = 0.0652, lr_0 = 2.7836e-04
Loss = 3.8708e-03, PNorm = 168.2076, GNorm = 0.2400, lr_0 = 2.7817e-04
Loss = 5.4538e-03, PNorm = 168.2141, GNorm = 0.2273, lr_0 = 2.7798e-04
Loss = 3.8846e-03, PNorm = 168.2211, GNorm = 0.1702, lr_0 = 2.7779e-04
Loss = 3.7375e-03, PNorm = 168.2296, GNorm = 0.1225, lr_0 = 2.7760e-04
Loss = 5.6025e-03, PNorm = 168.2370, GNorm = 0.1425, lr_0 = 2.7741e-04
Loss = 4.0305e-03, PNorm = 168.2454, GNorm = 0.3250, lr_0 = 2.7722e-04
Loss = 4.1303e-03, PNorm = 168.2542, GNorm = 0.1245, lr_0 = 2.7703e-04
Loss = 4.6749e-03, PNorm = 168.2641, GNorm = 0.1905, lr_0 = 2.7684e-04
Loss = 4.5314e-03, PNorm = 168.2747, GNorm = 0.1908, lr_0 = 2.7665e-04
Loss = 5.4049e-03, PNorm = 168.2860, GNorm = 0.2547, lr_0 = 2.7646e-04
Loss = 5.4598e-03, PNorm = 168.2964, GNorm = 0.2101, lr_0 = 2.7627e-04
Loss = 4.9087e-03, PNorm = 168.3062, GNorm = 0.3089, lr_0 = 2.7608e-04
Loss = 3.4365e-03, PNorm = 168.3172, GNorm = 0.1225, lr_0 = 2.7590e-04
Loss = 3.3896e-03, PNorm = 168.3258, GNorm = 0.2487, lr_0 = 2.7571e-04
Loss = 3.7384e-03, PNorm = 168.3358, GNorm = 0.2510, lr_0 = 2.7552e-04
Loss = 3.3886e-03, PNorm = 168.3468, GNorm = 0.3518, lr_0 = 2.7533e-04
Loss = 5.8504e-03, PNorm = 168.3513, GNorm = 0.1951, lr_0 = 2.7514e-04
Loss = 4.1041e-03, PNorm = 168.3561, GNorm = 0.1968, lr_0 = 2.7495e-04
Loss = 5.7306e-03, PNorm = 168.3652, GNorm = 0.1860, lr_0 = 2.7476e-04
Loss = 4.0991e-03, PNorm = 168.3787, GNorm = 0.0999, lr_0 = 2.7457e-04
Loss = 6.3010e-03, PNorm = 168.3887, GNorm = 0.2032, lr_0 = 2.7439e-04
Loss = 4.2076e-03, PNorm = 168.3963, GNorm = 0.2749, lr_0 = 2.7420e-04
Loss = 4.0118e-03, PNorm = 168.4047, GNorm = 0.1005, lr_0 = 2.7401e-04
Loss = 4.4307e-03, PNorm = 168.4115, GNorm = 0.2667, lr_0 = 2.7382e-04
Loss = 3.6943e-03, PNorm = 168.4211, GNorm = 0.1545, lr_0 = 2.7364e-04
Loss = 4.5421e-03, PNorm = 168.4294, GNorm = 0.0870, lr_0 = 2.7345e-04
Loss = 4.8036e-03, PNorm = 168.4379, GNorm = 0.2369, lr_0 = 2.7326e-04
Loss = 3.9751e-03, PNorm = 168.4510, GNorm = 0.3044, lr_0 = 2.7307e-04
Loss = 4.9918e-03, PNorm = 168.4636, GNorm = 0.1549, lr_0 = 2.7289e-04
Loss = 3.8709e-03, PNorm = 168.4749, GNorm = 0.3868, lr_0 = 2.7270e-04
Loss = 4.7615e-03, PNorm = 168.4856, GNorm = 0.2479, lr_0 = 2.7251e-04
Loss = 3.8125e-03, PNorm = 168.4962, GNorm = 0.1119, lr_0 = 2.7233e-04
Loss = 3.8762e-03, PNorm = 168.5056, GNorm = 0.0641, lr_0 = 2.7214e-04
Loss = 5.1556e-03, PNorm = 168.5160, GNorm = 0.2387, lr_0 = 2.7195e-04
Loss = 4.4572e-03, PNorm = 168.5223, GNorm = 0.1522, lr_0 = 2.7177e-04
Loss = 5.1730e-03, PNorm = 168.5328, GNorm = 0.2183, lr_0 = 2.7158e-04
Loss = 4.0650e-03, PNorm = 168.5448, GNorm = 0.3287, lr_0 = 2.7139e-04
Loss = 4.0474e-03, PNorm = 168.5558, GNorm = 0.2931, lr_0 = 2.7121e-04
Loss = 5.3716e-03, PNorm = 168.5657, GNorm = 0.1175, lr_0 = 2.7102e-04
Loss = 3.0282e-03, PNorm = 168.5745, GNorm = 0.1573, lr_0 = 2.7084e-04
Loss = 4.0235e-03, PNorm = 168.5835, GNorm = 0.1765, lr_0 = 2.7065e-04
Loss = 3.8631e-03, PNorm = 168.5914, GNorm = 0.0909, lr_0 = 2.7047e-04
Loss = 4.8601e-03, PNorm = 168.6014, GNorm = 0.2539, lr_0 = 2.7028e-04
Loss = 4.1828e-03, PNorm = 168.6109, GNorm = 0.4001, lr_0 = 2.7010e-04
Loss = 4.6265e-03, PNorm = 168.6217, GNorm = 0.2874, lr_0 = 2.6991e-04
Loss = 4.1495e-03, PNorm = 168.6308, GNorm = 0.0639, lr_0 = 2.6973e-04
Loss = 4.3356e-03, PNorm = 168.6381, GNorm = 0.2291, lr_0 = 2.6954e-04
Loss = 4.4314e-03, PNorm = 168.6479, GNorm = 0.1657, lr_0 = 2.6936e-04
Loss = 6.2053e-03, PNorm = 168.6559, GNorm = 0.2369, lr_0 = 2.6917e-04
Loss = 6.0955e-03, PNorm = 168.6634, GNorm = 0.4377, lr_0 = 2.6899e-04
Loss = 4.4639e-03, PNorm = 168.6732, GNorm = 0.1689, lr_0 = 2.6880e-04
Loss = 9.1628e-03, PNorm = 168.6846, GNorm = 0.1576, lr_0 = 2.6862e-04
Loss = 3.7865e-03, PNorm = 168.6947, GNorm = 0.2450, lr_0 = 2.6844e-04
Loss = 3.6112e-03, PNorm = 168.7045, GNorm = 0.0865, lr_0 = 2.6825e-04
Validation mae = 0.278903
Epoch 18
Loss = 4.2638e-03, PNorm = 168.7127, GNorm = 0.1155, lr_0 = 2.6807e-04
Loss = 4.0028e-03, PNorm = 168.7177, GNorm = 0.0975, lr_0 = 2.6788e-04
Loss = 5.5426e-03, PNorm = 168.7218, GNorm = 0.1809, lr_0 = 2.6770e-04
Loss = 3.4956e-03, PNorm = 168.7316, GNorm = 0.0819, lr_0 = 2.6752e-04
Loss = 3.5534e-03, PNorm = 168.7406, GNorm = 0.1862, lr_0 = 2.6733e-04
Loss = 3.2802e-03, PNorm = 168.7486, GNorm = 0.0717, lr_0 = 2.6715e-04
Loss = 3.4382e-03, PNorm = 168.7560, GNorm = 0.0813, lr_0 = 2.6697e-04
Loss = 3.8229e-03, PNorm = 168.7643, GNorm = 0.1233, lr_0 = 2.6678e-04
Loss = 3.4577e-03, PNorm = 168.7712, GNorm = 0.1031, lr_0 = 2.6660e-04
Loss = 3.9095e-03, PNorm = 168.7778, GNorm = 0.1575, lr_0 = 2.6642e-04
Loss = 4.0295e-03, PNorm = 168.7833, GNorm = 0.2478, lr_0 = 2.6624e-04
Loss = 3.3180e-03, PNorm = 168.7903, GNorm = 0.0662, lr_0 = 2.6605e-04
Loss = 3.9911e-03, PNorm = 168.7973, GNorm = 0.0826, lr_0 = 2.6587e-04
Loss = 4.0613e-03, PNorm = 168.8022, GNorm = 0.1397, lr_0 = 2.6569e-04
Loss = 4.5051e-03, PNorm = 168.8068, GNorm = 0.2130, lr_0 = 2.6551e-04
Loss = 3.9019e-03, PNorm = 168.8116, GNorm = 0.1373, lr_0 = 2.6533e-04
Loss = 3.7870e-03, PNorm = 168.8182, GNorm = 0.2650, lr_0 = 2.6514e-04
Loss = 3.7136e-03, PNorm = 168.8244, GNorm = 0.0855, lr_0 = 2.6496e-04
Loss = 3.8495e-03, PNorm = 168.8298, GNorm = 0.1619, lr_0 = 2.6478e-04
Loss = 3.5427e-03, PNorm = 168.8397, GNorm = 0.2773, lr_0 = 2.6460e-04
Loss = 4.8927e-03, PNorm = 168.8450, GNorm = 0.1052, lr_0 = 2.6442e-04
Loss = 5.7346e-03, PNorm = 168.8544, GNorm = 0.1074, lr_0 = 2.6424e-04
Loss = 3.7322e-03, PNorm = 168.8636, GNorm = 0.0843, lr_0 = 2.6406e-04
Loss = 3.5357e-03, PNorm = 168.8726, GNorm = 0.2771, lr_0 = 2.6388e-04
Loss = 4.0797e-03, PNorm = 168.8785, GNorm = 0.1599, lr_0 = 2.6369e-04
Loss = 3.1069e-03, PNorm = 168.8871, GNorm = 0.3005, lr_0 = 2.6351e-04
Loss = 3.9483e-03, PNorm = 168.8931, GNorm = 0.0958, lr_0 = 2.6333e-04
Loss = 4.0809e-03, PNorm = 168.9017, GNorm = 0.1670, lr_0 = 2.6315e-04
Loss = 5.1488e-03, PNorm = 168.9099, GNorm = 0.2149, lr_0 = 2.6297e-04
Loss = 4.2443e-03, PNorm = 168.9141, GNorm = 0.4163, lr_0 = 2.6279e-04
Loss = 5.4865e-03, PNorm = 168.9205, GNorm = 0.1698, lr_0 = 2.6261e-04
Loss = 4.4277e-03, PNorm = 168.9271, GNorm = 0.1710, lr_0 = 2.6243e-04
Loss = 5.0628e-03, PNorm = 168.9355, GNorm = 0.1185, lr_0 = 2.6225e-04
Loss = 3.3465e-03, PNorm = 168.9415, GNorm = 0.1495, lr_0 = 2.6207e-04
Loss = 3.4337e-03, PNorm = 168.9471, GNorm = 0.2589, lr_0 = 2.6189e-04
Loss = 3.3538e-03, PNorm = 168.9522, GNorm = 0.1679, lr_0 = 2.6171e-04
Loss = 3.1594e-03, PNorm = 168.9595, GNorm = 0.1213, lr_0 = 2.6153e-04
Loss = 4.1668e-03, PNorm = 168.9664, GNorm = 0.1959, lr_0 = 2.6136e-04
Loss = 5.1892e-03, PNorm = 168.9741, GNorm = 0.2755, lr_0 = 2.6118e-04
Loss = 3.4611e-03, PNorm = 168.9817, GNorm = 0.2917, lr_0 = 2.6100e-04
Loss = 3.1566e-03, PNorm = 168.9892, GNorm = 0.0992, lr_0 = 2.6082e-04
Loss = 3.8437e-03, PNorm = 168.9938, GNorm = 0.1316, lr_0 = 2.6064e-04
Loss = 3.3454e-03, PNorm = 169.0016, GNorm = 0.0948, lr_0 = 2.6046e-04
Loss = 3.7886e-03, PNorm = 169.0073, GNorm = 0.0477, lr_0 = 2.6028e-04
Loss = 3.3653e-03, PNorm = 169.0152, GNorm = 0.0636, lr_0 = 2.6011e-04
Loss = 3.9929e-03, PNorm = 169.0221, GNorm = 0.2158, lr_0 = 2.5993e-04
Loss = 3.4302e-03, PNorm = 169.0305, GNorm = 0.2365, lr_0 = 2.5975e-04
Loss = 4.6454e-03, PNorm = 169.0390, GNorm = 0.1561, lr_0 = 2.5957e-04
Loss = 4.4534e-03, PNorm = 169.0461, GNorm = 0.2406, lr_0 = 2.5939e-04
Loss = 3.4724e-03, PNorm = 169.0539, GNorm = 0.2974, lr_0 = 2.5922e-04
Loss = 3.2266e-03, PNorm = 169.0635, GNorm = 0.1843, lr_0 = 2.5904e-04
Loss = 2.6691e-03, PNorm = 169.0696, GNorm = 0.1124, lr_0 = 2.5886e-04
Loss = 2.9610e-03, PNorm = 169.0757, GNorm = 0.0672, lr_0 = 2.5868e-04
Loss = 3.1069e-03, PNorm = 169.0786, GNorm = 0.0719, lr_0 = 2.5851e-04
Loss = 3.6198e-03, PNorm = 169.0829, GNorm = 0.0924, lr_0 = 2.5833e-04
Loss = 4.1485e-03, PNorm = 169.0892, GNorm = 0.2900, lr_0 = 2.5815e-04
Loss = 3.0322e-03, PNorm = 169.0973, GNorm = 0.1320, lr_0 = 2.5797e-04
Loss = 3.2724e-03, PNorm = 169.1029, GNorm = 0.0813, lr_0 = 2.5780e-04
Loss = 4.6932e-03, PNorm = 169.1096, GNorm = 0.2172, lr_0 = 2.5762e-04
Loss = 3.3747e-03, PNorm = 169.1179, GNorm = 0.1260, lr_0 = 2.5745e-04
Loss = 3.8293e-03, PNorm = 169.1272, GNorm = 0.0637, lr_0 = 2.5727e-04
Loss = 3.7322e-03, PNorm = 169.1373, GNorm = 0.0825, lr_0 = 2.5709e-04
Loss = 3.0766e-03, PNorm = 169.1456, GNorm = 0.2383, lr_0 = 2.5692e-04
Loss = 3.3315e-03, PNorm = 169.1517, GNorm = 0.0614, lr_0 = 2.5674e-04
Loss = 4.4201e-03, PNorm = 169.1582, GNorm = 0.1235, lr_0 = 2.5656e-04
Loss = 3.8313e-03, PNorm = 169.1634, GNorm = 0.2015, lr_0 = 2.5639e-04
Loss = 4.1272e-03, PNorm = 169.1720, GNorm = 0.2274, lr_0 = 2.5621e-04
Loss = 3.0697e-03, PNorm = 169.1802, GNorm = 0.2012, lr_0 = 2.5604e-04
Loss = 3.1731e-03, PNorm = 169.1909, GNorm = 0.0784, lr_0 = 2.5586e-04
Loss = 3.7142e-03, PNorm = 169.2000, GNorm = 0.1077, lr_0 = 2.5569e-04
Loss = 4.8050e-03, PNorm = 169.2053, GNorm = 0.2052, lr_0 = 2.5551e-04
Loss = 5.6208e-03, PNorm = 169.2144, GNorm = 0.0658, lr_0 = 2.5534e-04
Loss = 3.5975e-03, PNorm = 169.2215, GNorm = 0.3179, lr_0 = 2.5516e-04
Loss = 4.3432e-03, PNorm = 169.2287, GNorm = 0.2189, lr_0 = 2.5499e-04
Loss = 3.0804e-03, PNorm = 169.2334, GNorm = 0.0859, lr_0 = 2.5481e-04
Loss = 3.7992e-03, PNorm = 169.2378, GNorm = 0.0644, lr_0 = 2.5464e-04
Loss = 4.0541e-03, PNorm = 169.2431, GNorm = 0.1641, lr_0 = 2.5446e-04
Loss = 2.9192e-03, PNorm = 169.2523, GNorm = 0.1617, lr_0 = 2.5429e-04
Loss = 4.5750e-03, PNorm = 169.2610, GNorm = 0.1995, lr_0 = 2.5411e-04
Loss = 3.7440e-03, PNorm = 169.2703, GNorm = 0.1089, lr_0 = 2.5394e-04
Loss = 3.7658e-03, PNorm = 169.2801, GNorm = 0.1430, lr_0 = 2.5377e-04
Loss = 5.2283e-03, PNorm = 169.2886, GNorm = 0.0778, lr_0 = 2.5359e-04
Loss = 2.9053e-03, PNorm = 169.2961, GNorm = 0.2034, lr_0 = 2.5342e-04
Loss = 3.7276e-03, PNorm = 169.3034, GNorm = 0.3725, lr_0 = 2.5325e-04
Loss = 4.2141e-03, PNorm = 169.3115, GNorm = 0.1011, lr_0 = 2.5307e-04
Loss = 3.7648e-03, PNorm = 169.3195, GNorm = 0.0595, lr_0 = 2.5290e-04
Loss = 3.2911e-03, PNorm = 169.3271, GNorm = 0.1637, lr_0 = 2.5273e-04
Loss = 3.4090e-03, PNorm = 169.3337, GNorm = 0.3170, lr_0 = 2.5255e-04
Loss = 5.8898e-03, PNorm = 169.3441, GNorm = 0.0863, lr_0 = 2.5238e-04
Loss = 4.2493e-03, PNorm = 169.3534, GNorm = 0.3178, lr_0 = 2.5221e-04
Loss = 3.2695e-03, PNorm = 169.3638, GNorm = 0.1549, lr_0 = 2.5203e-04
Loss = 2.9696e-03, PNorm = 169.3757, GNorm = 0.0564, lr_0 = 2.5186e-04
Loss = 5.9493e-03, PNorm = 169.3848, GNorm = 0.0565, lr_0 = 2.5169e-04
Loss = 4.4556e-03, PNorm = 169.3911, GNorm = 0.1885, lr_0 = 2.5152e-04
Loss = 6.2306e-03, PNorm = 169.3979, GNorm = 0.1444, lr_0 = 2.5134e-04
Loss = 3.3924e-03, PNorm = 169.4048, GNorm = 0.1334, lr_0 = 2.5117e-04
Loss = 4.4543e-03, PNorm = 169.4133, GNorm = 0.0788, lr_0 = 2.5100e-04
Loss = 5.9788e-03, PNorm = 169.4223, GNorm = 0.1788, lr_0 = 2.5083e-04
Loss = 3.9152e-03, PNorm = 169.4298, GNorm = 0.2171, lr_0 = 2.5066e-04
Loss = 4.3160e-03, PNorm = 169.4391, GNorm = 0.1796, lr_0 = 2.5048e-04
Loss = 7.2614e-03, PNorm = 169.4496, GNorm = 0.0548, lr_0 = 2.5031e-04
Loss = 4.4827e-03, PNorm = 169.4576, GNorm = 0.1020, lr_0 = 2.5014e-04
Loss = 3.3364e-03, PNorm = 169.4638, GNorm = 0.2592, lr_0 = 2.4997e-04
Loss = 3.6184e-03, PNorm = 169.4725, GNorm = 0.1286, lr_0 = 2.4980e-04
Loss = 5.8926e-03, PNorm = 169.4794, GNorm = 0.1842, lr_0 = 2.4963e-04
Loss = 3.1819e-03, PNorm = 169.4884, GNorm = 0.2628, lr_0 = 2.4946e-04
Loss = 6.1677e-03, PNorm = 169.4920, GNorm = 0.0952, lr_0 = 2.4929e-04
Loss = 4.2500e-03, PNorm = 169.4999, GNorm = 0.2891, lr_0 = 2.4911e-04
Loss = 5.6315e-03, PNorm = 169.5064, GNorm = 0.1160, lr_0 = 2.4894e-04
Loss = 3.4408e-03, PNorm = 169.5178, GNorm = 0.1200, lr_0 = 2.4877e-04
Loss = 4.1105e-03, PNorm = 169.5268, GNorm = 0.2446, lr_0 = 2.4860e-04
Loss = 3.3661e-03, PNorm = 169.5363, GNorm = 0.2093, lr_0 = 2.4843e-04
Loss = 3.5794e-03, PNorm = 169.5458, GNorm = 0.0792, lr_0 = 2.4826e-04
Loss = 7.8350e-03, PNorm = 169.5524, GNorm = 0.1997, lr_0 = 2.4809e-04
Loss = 4.0728e-03, PNorm = 169.5560, GNorm = 0.1406, lr_0 = 2.4792e-04
Loss = 4.9973e-03, PNorm = 169.5649, GNorm = 0.1548, lr_0 = 2.4775e-04
Loss = 7.6008e-03, PNorm = 169.5750, GNorm = 0.2495, lr_0 = 2.4758e-04
Loss = 4.8444e-03, PNorm = 169.5867, GNorm = 0.2925, lr_0 = 2.4741e-04
Loss = 4.3228e-03, PNorm = 169.5947, GNorm = 0.1270, lr_0 = 2.4724e-04
Loss = 4.5491e-03, PNorm = 169.6034, GNorm = 0.3491, lr_0 = 2.4707e-04
Validation mae = 0.278470
Epoch 19
Loss = 3.1751e-03, PNorm = 169.6096, GNorm = 0.1269, lr_0 = 2.4690e-04
Loss = 3.7966e-03, PNorm = 169.6185, GNorm = 0.1260, lr_0 = 2.4674e-04
Loss = 4.4690e-03, PNorm = 169.6254, GNorm = 0.2232, lr_0 = 2.4657e-04
Loss = 3.4700e-03, PNorm = 169.6301, GNorm = 0.2954, lr_0 = 2.4640e-04
Loss = 3.0193e-03, PNorm = 169.6355, GNorm = 0.1337, lr_0 = 2.4623e-04
Loss = 2.8023e-03, PNorm = 169.6411, GNorm = 0.2178, lr_0 = 2.4606e-04
Loss = 3.1117e-03, PNorm = 169.6450, GNorm = 0.0878, lr_0 = 2.4589e-04
Loss = 3.9738e-03, PNorm = 169.6506, GNorm = 0.1742, lr_0 = 2.4572e-04
Loss = 4.4522e-03, PNorm = 169.6572, GNorm = 0.1435, lr_0 = 2.4556e-04
Loss = 3.3270e-03, PNorm = 169.6641, GNorm = 0.1492, lr_0 = 2.4539e-04
Loss = 2.4671e-03, PNorm = 169.6695, GNorm = 0.2187, lr_0 = 2.4522e-04
Loss = 4.3286e-03, PNorm = 169.6749, GNorm = 0.1455, lr_0 = 2.4505e-04
Loss = 4.1162e-03, PNorm = 169.6816, GNorm = 0.2485, lr_0 = 2.4488e-04
Loss = 4.2009e-03, PNorm = 169.6867, GNorm = 0.3981, lr_0 = 2.4472e-04
Loss = 3.0527e-03, PNorm = 169.6925, GNorm = 0.1150, lr_0 = 2.4455e-04
Loss = 2.6041e-03, PNorm = 169.6968, GNorm = 0.1079, lr_0 = 2.4438e-04
Loss = 3.0673e-03, PNorm = 169.7031, GNorm = 0.2815, lr_0 = 2.4421e-04
Loss = 2.9231e-03, PNorm = 169.7111, GNorm = 0.0736, lr_0 = 2.4405e-04
Loss = 2.4919e-03, PNorm = 169.7165, GNorm = 0.0770, lr_0 = 2.4388e-04
Loss = 2.9580e-03, PNorm = 169.7215, GNorm = 0.0475, lr_0 = 2.4371e-04
Loss = 4.2028e-03, PNorm = 169.7271, GNorm = 0.2184, lr_0 = 2.4354e-04
Loss = 3.5494e-03, PNorm = 169.7313, GNorm = 0.0731, lr_0 = 2.4338e-04
Loss = 3.8533e-03, PNorm = 169.7384, GNorm = 0.2181, lr_0 = 2.4321e-04
Loss = 3.5631e-03, PNorm = 169.7431, GNorm = 0.0752, lr_0 = 2.4304e-04
Loss = 4.1875e-03, PNorm = 169.7503, GNorm = 0.1917, lr_0 = 2.4288e-04
Loss = 2.7639e-03, PNorm = 169.7582, GNorm = 0.0667, lr_0 = 2.4271e-04
Loss = 3.2202e-03, PNorm = 169.7644, GNorm = 0.2925, lr_0 = 2.4254e-04
Loss = 3.5762e-03, PNorm = 169.7690, GNorm = 0.1793, lr_0 = 2.4238e-04
Loss = 2.5177e-03, PNorm = 169.7727, GNorm = 0.1117, lr_0 = 2.4221e-04
Loss = 3.2966e-03, PNorm = 169.7756, GNorm = 0.1514, lr_0 = 2.4205e-04
Loss = 2.5816e-03, PNorm = 169.7825, GNorm = 0.0548, lr_0 = 2.4188e-04
Loss = 2.9101e-03, PNorm = 169.7887, GNorm = 0.0831, lr_0 = 2.4171e-04
Loss = 3.0494e-03, PNorm = 169.7950, GNorm = 0.1290, lr_0 = 2.4155e-04
Loss = 2.8985e-03, PNorm = 169.7999, GNorm = 0.0616, lr_0 = 2.4138e-04
Loss = 4.0460e-03, PNorm = 169.8067, GNorm = 0.1116, lr_0 = 2.4122e-04
Loss = 4.1707e-03, PNorm = 169.8095, GNorm = 0.0920, lr_0 = 2.4105e-04
Loss = 2.7649e-03, PNorm = 169.8130, GNorm = 0.1333, lr_0 = 2.4089e-04
Loss = 3.6884e-03, PNorm = 169.8176, GNorm = 0.1715, lr_0 = 2.4072e-04
Loss = 2.6676e-03, PNorm = 169.8234, GNorm = 0.1774, lr_0 = 2.4056e-04
Loss = 5.3469e-03, PNorm = 169.8302, GNorm = 0.0792, lr_0 = 2.4039e-04
Loss = 3.8999e-03, PNorm = 169.8354, GNorm = 0.1082, lr_0 = 2.4023e-04
Loss = 2.8870e-03, PNorm = 169.8394, GNorm = 0.1616, lr_0 = 2.4006e-04
Loss = 2.7001e-03, PNorm = 169.8442, GNorm = 0.0620, lr_0 = 2.3990e-04
Loss = 4.5448e-03, PNorm = 169.8496, GNorm = 0.2411, lr_0 = 2.3974e-04
Loss = 2.6173e-03, PNorm = 169.8537, GNorm = 0.0650, lr_0 = 2.3957e-04
Loss = 4.0487e-03, PNorm = 169.8632, GNorm = 0.2162, lr_0 = 2.3941e-04
Loss = 2.7348e-03, PNorm = 169.8686, GNorm = 0.2668, lr_0 = 2.3924e-04
Loss = 2.9189e-03, PNorm = 169.8760, GNorm = 0.1869, lr_0 = 2.3908e-04
Loss = 3.0309e-03, PNorm = 169.8846, GNorm = 0.1554, lr_0 = 2.3892e-04
Loss = 2.7561e-03, PNorm = 169.8931, GNorm = 0.1785, lr_0 = 2.3875e-04
Loss = 3.7414e-03, PNorm = 169.9010, GNorm = 0.5037, lr_0 = 2.3859e-04
Loss = 4.8944e-03, PNorm = 169.9071, GNorm = 0.1136, lr_0 = 2.3842e-04
Loss = 2.5109e-03, PNorm = 169.9127, GNorm = 0.0823, lr_0 = 2.3826e-04
Loss = 2.5978e-03, PNorm = 169.9195, GNorm = 0.0813, lr_0 = 2.3810e-04
Loss = 3.0072e-03, PNorm = 169.9231, GNorm = 0.1671, lr_0 = 2.3794e-04
Loss = 2.9740e-03, PNorm = 169.9288, GNorm = 0.2645, lr_0 = 2.3777e-04
Loss = 2.8581e-03, PNorm = 169.9363, GNorm = 0.1464, lr_0 = 2.3761e-04
Loss = 3.1960e-03, PNorm = 169.9424, GNorm = 0.1129, lr_0 = 2.3745e-04
Loss = 2.8660e-03, PNorm = 169.9479, GNorm = 0.1322, lr_0 = 2.3728e-04
Loss = 6.3264e-03, PNorm = 169.9529, GNorm = 0.3030, lr_0 = 2.3712e-04
Loss = 3.2635e-03, PNorm = 169.9605, GNorm = 0.2983, lr_0 = 2.3696e-04
Loss = 2.5672e-03, PNorm = 169.9698, GNorm = 0.1276, lr_0 = 2.3680e-04
Loss = 3.7281e-03, PNorm = 169.9784, GNorm = 0.1951, lr_0 = 2.3663e-04
Loss = 4.9553e-03, PNorm = 169.9845, GNorm = 0.2160, lr_0 = 2.3647e-04
Loss = 3.8915e-03, PNorm = 169.9876, GNorm = 0.2205, lr_0 = 2.3631e-04
Loss = 2.7691e-03, PNorm = 169.9925, GNorm = 0.1819, lr_0 = 2.3615e-04
Loss = 2.8091e-03, PNorm = 169.9958, GNorm = 0.0723, lr_0 = 2.3599e-04
Loss = 3.6708e-03, PNorm = 170.0039, GNorm = 0.2019, lr_0 = 2.3582e-04
Loss = 3.6889e-03, PNorm = 170.0125, GNorm = 0.2103, lr_0 = 2.3566e-04
Loss = 4.4132e-03, PNorm = 170.0179, GNorm = 0.5030, lr_0 = 2.3550e-04
Loss = 2.5683e-03, PNorm = 170.0254, GNorm = 0.0587, lr_0 = 2.3534e-04
Loss = 3.3294e-03, PNorm = 170.0327, GNorm = 0.1924, lr_0 = 2.3518e-04
Loss = 3.4433e-03, PNorm = 170.0408, GNorm = 0.0447, lr_0 = 2.3502e-04
Loss = 3.0182e-03, PNorm = 170.0456, GNorm = 0.0877, lr_0 = 2.3486e-04
Loss = 4.5800e-03, PNorm = 170.0504, GNorm = 0.0853, lr_0 = 2.3470e-04
Loss = 7.8544e-03, PNorm = 170.0532, GNorm = 0.3464, lr_0 = 2.3454e-04
Loss = 3.9125e-03, PNorm = 170.0600, GNorm = 0.2232, lr_0 = 2.3437e-04
Loss = 4.4961e-03, PNorm = 170.0680, GNorm = 0.9039, lr_0 = 2.3421e-04
Loss = 3.8554e-03, PNorm = 170.0720, GNorm = 0.2312, lr_0 = 2.3405e-04
Loss = 5.5496e-03, PNorm = 170.0791, GNorm = 0.2208, lr_0 = 2.3389e-04
Loss = 3.0133e-03, PNorm = 170.0852, GNorm = 0.1370, lr_0 = 2.3373e-04
Loss = 4.6421e-03, PNorm = 170.0923, GNorm = 0.0966, lr_0 = 2.3357e-04
Loss = 4.5728e-03, PNorm = 170.0992, GNorm = 0.3565, lr_0 = 2.3341e-04
Loss = 2.7907e-03, PNorm = 170.1031, GNorm = 0.1091, lr_0 = 2.3325e-04
Loss = 4.1168e-03, PNorm = 170.1077, GNorm = 0.1210, lr_0 = 2.3309e-04
Loss = 4.9766e-03, PNorm = 170.1145, GNorm = 0.3708, lr_0 = 2.3293e-04
Loss = 3.2564e-03, PNorm = 170.1248, GNorm = 0.1242, lr_0 = 2.3277e-04
Loss = 4.2736e-03, PNorm = 170.1333, GNorm = 0.2151, lr_0 = 2.3261e-04
Loss = 3.7889e-03, PNorm = 170.1399, GNorm = 0.1095, lr_0 = 2.3246e-04
Loss = 2.9053e-03, PNorm = 170.1465, GNorm = 0.2185, lr_0 = 2.3230e-04
Loss = 4.0405e-03, PNorm = 170.1504, GNorm = 0.1734, lr_0 = 2.3214e-04
Loss = 2.8259e-03, PNorm = 170.1571, GNorm = 0.1028, lr_0 = 2.3198e-04
Loss = 2.7980e-03, PNorm = 170.1640, GNorm = 0.1920, lr_0 = 2.3182e-04
Loss = 4.8329e-03, PNorm = 170.1727, GNorm = 0.0859, lr_0 = 2.3166e-04
Loss = 4.3133e-03, PNorm = 170.1781, GNorm = 0.1809, lr_0 = 2.3150e-04
Loss = 2.5454e-03, PNorm = 170.1858, GNorm = 0.0772, lr_0 = 2.3134e-04
Loss = 3.7598e-03, PNorm = 170.1919, GNorm = 0.1360, lr_0 = 2.3118e-04
Loss = 3.8242e-03, PNorm = 170.1994, GNorm = 0.0915, lr_0 = 2.3103e-04
Loss = 3.4714e-03, PNorm = 170.2017, GNorm = 0.2969, lr_0 = 2.3087e-04
Loss = 4.0631e-03, PNorm = 170.2081, GNorm = 0.2410, lr_0 = 2.3071e-04
Loss = 2.7987e-03, PNorm = 170.2139, GNorm = 0.1850, lr_0 = 2.3055e-04
Loss = 2.8317e-03, PNorm = 170.2230, GNorm = 0.2256, lr_0 = 2.3039e-04
Loss = 3.7364e-03, PNorm = 170.2327, GNorm = 0.1770, lr_0 = 2.3024e-04
Loss = 3.1844e-03, PNorm = 170.2413, GNorm = 0.1903, lr_0 = 2.3008e-04
Loss = 3.6151e-03, PNorm = 170.2484, GNorm = 0.2487, lr_0 = 2.2992e-04
Loss = 3.3145e-03, PNorm = 170.2553, GNorm = 0.1101, lr_0 = 2.2976e-04
Loss = 2.6652e-03, PNorm = 170.2614, GNorm = 0.2119, lr_0 = 2.2961e-04
Loss = 3.1138e-03, PNorm = 170.2669, GNorm = 0.1155, lr_0 = 2.2945e-04
Loss = 2.6305e-03, PNorm = 170.2733, GNorm = 0.2153, lr_0 = 2.2929e-04
Loss = 5.0758e-03, PNorm = 170.2802, GNorm = 0.0815, lr_0 = 2.2913e-04
Loss = 4.4684e-03, PNorm = 170.2851, GNorm = 0.1473, lr_0 = 2.2898e-04
Loss = 2.5618e-03, PNorm = 170.2898, GNorm = 0.2409, lr_0 = 2.2882e-04
Loss = 2.3258e-03, PNorm = 170.2955, GNorm = 0.1642, lr_0 = 2.2866e-04
Loss = 3.6708e-03, PNorm = 170.3019, GNorm = 0.1215, lr_0 = 2.2851e-04
Loss = 2.5301e-03, PNorm = 170.3078, GNorm = 0.2870, lr_0 = 2.2835e-04
Loss = 3.3703e-03, PNorm = 170.3140, GNorm = 0.2032, lr_0 = 2.2819e-04
Loss = 3.9610e-03, PNorm = 170.3187, GNorm = 0.1230, lr_0 = 2.2804e-04
Loss = 3.2972e-03, PNorm = 170.3247, GNorm = 0.1216, lr_0 = 2.2788e-04
Loss = 4.2283e-03, PNorm = 170.3321, GNorm = 0.1219, lr_0 = 2.2773e-04
Loss = 5.3518e-03, PNorm = 170.3400, GNorm = 0.2850, lr_0 = 2.2757e-04
Validation mae = 0.278391
Epoch 20
Loss = 2.6654e-03, PNorm = 170.3447, GNorm = 0.2001, lr_0 = 2.2741e-04
Loss = 2.9427e-03, PNorm = 170.3517, GNorm = 0.1514, lr_0 = 2.2726e-04
Loss = 3.3613e-03, PNorm = 170.3562, GNorm = 0.0564, lr_0 = 2.2710e-04
Loss = 2.6451e-03, PNorm = 170.3603, GNorm = 0.1000, lr_0 = 2.2695e-04
Loss = 2.8633e-03, PNorm = 170.3658, GNorm = 0.3275, lr_0 = 2.2679e-04
Loss = 2.5837e-03, PNorm = 170.3705, GNorm = 0.1834, lr_0 = 2.2664e-04
Loss = 3.0734e-03, PNorm = 170.3756, GNorm = 0.2104, lr_0 = 2.2648e-04
Loss = 3.3905e-03, PNorm = 170.3841, GNorm = 0.0591, lr_0 = 2.2632e-04
Loss = 3.1581e-03, PNorm = 170.3902, GNorm = 0.2047, lr_0 = 2.2617e-04
Loss = 3.0952e-03, PNorm = 170.3958, GNorm = 0.1747, lr_0 = 2.2601e-04
Loss = 2.2154e-03, PNorm = 170.3994, GNorm = 0.2170, lr_0 = 2.2586e-04
Loss = 2.5071e-03, PNorm = 170.4031, GNorm = 0.1625, lr_0 = 2.2571e-04
Loss = 2.6391e-03, PNorm = 170.4088, GNorm = 0.1514, lr_0 = 2.2555e-04
Loss = 2.6492e-03, PNorm = 170.4154, GNorm = 0.0709, lr_0 = 2.2540e-04
Loss = 3.4681e-03, PNorm = 170.4217, GNorm = 0.2208, lr_0 = 2.2524e-04
Loss = 3.5052e-03, PNorm = 170.4267, GNorm = 0.1944, lr_0 = 2.2509e-04
Loss = 2.2161e-03, PNorm = 170.4325, GNorm = 0.1547, lr_0 = 2.2493e-04
Loss = 3.2076e-03, PNorm = 170.4407, GNorm = 0.1185, lr_0 = 2.2478e-04
Loss = 2.5290e-03, PNorm = 170.4450, GNorm = 0.1349, lr_0 = 2.2463e-04
Loss = 2.7495e-03, PNorm = 170.4487, GNorm = 0.1880, lr_0 = 2.2447e-04
Loss = 3.1217e-03, PNorm = 170.4528, GNorm = 0.1005, lr_0 = 2.2432e-04
Loss = 2.2832e-03, PNorm = 170.4570, GNorm = 0.0793, lr_0 = 2.2416e-04
Loss = 2.4238e-03, PNorm = 170.4603, GNorm = 0.2021, lr_0 = 2.2401e-04
Loss = 2.8546e-03, PNorm = 170.4652, GNorm = 0.1371, lr_0 = 2.2386e-04
Loss = 2.5699e-03, PNorm = 170.4695, GNorm = 0.2213, lr_0 = 2.2370e-04
Loss = 3.4056e-03, PNorm = 170.4741, GNorm = 0.1953, lr_0 = 2.2355e-04
Loss = 2.8078e-03, PNorm = 170.4771, GNorm = 0.1961, lr_0 = 2.2340e-04
Loss = 2.7021e-03, PNorm = 170.4807, GNorm = 0.1064, lr_0 = 2.2324e-04
Loss = 2.1947e-03, PNorm = 170.4859, GNorm = 0.1945, lr_0 = 2.2309e-04
Loss = 2.7726e-03, PNorm = 170.4929, GNorm = 0.1688, lr_0 = 2.2294e-04
Loss = 3.8811e-03, PNorm = 170.4970, GNorm = 0.1330, lr_0 = 2.2279e-04
Loss = 2.5855e-03, PNorm = 170.5004, GNorm = 0.3264, lr_0 = 2.2263e-04
Loss = 3.5621e-03, PNorm = 170.5063, GNorm = 0.2105, lr_0 = 2.2248e-04
Loss = 2.7216e-03, PNorm = 170.5104, GNorm = 0.2558, lr_0 = 2.2233e-04
Loss = 3.1593e-03, PNorm = 170.5150, GNorm = 0.2347, lr_0 = 2.2218e-04
Loss = 3.1581e-03, PNorm = 170.5200, GNorm = 0.1981, lr_0 = 2.2202e-04
Loss = 3.5847e-03, PNorm = 170.5225, GNorm = 0.4074, lr_0 = 2.2187e-04
Loss = 4.0496e-03, PNorm = 170.5279, GNorm = 0.1783, lr_0 = 2.2172e-04
Loss = 2.9370e-03, PNorm = 170.5338, GNorm = 0.1562, lr_0 = 2.2157e-04
Loss = 2.6659e-03, PNorm = 170.5413, GNorm = 0.1328, lr_0 = 2.2142e-04
Loss = 2.4839e-03, PNorm = 170.5483, GNorm = 0.1608, lr_0 = 2.2126e-04
Loss = 2.6433e-03, PNorm = 170.5543, GNorm = 0.1146, lr_0 = 2.2111e-04
Loss = 2.6949e-03, PNorm = 170.5588, GNorm = 0.1128, lr_0 = 2.2096e-04
Loss = 2.5691e-03, PNorm = 170.5636, GNorm = 0.0931, lr_0 = 2.2081e-04
Loss = 2.2674e-03, PNorm = 170.5690, GNorm = 0.2291, lr_0 = 2.2066e-04
Loss = 2.2809e-03, PNorm = 170.5739, GNorm = 0.1508, lr_0 = 2.2051e-04
Loss = 3.3989e-03, PNorm = 170.5784, GNorm = 0.1082, lr_0 = 2.2036e-04
Loss = 4.5851e-03, PNorm = 170.5821, GNorm = 0.1361, lr_0 = 2.2021e-04
Loss = 3.7628e-03, PNorm = 170.5864, GNorm = 0.1290, lr_0 = 2.2005e-04
Loss = 4.0089e-03, PNorm = 170.5922, GNorm = 0.1282, lr_0 = 2.1990e-04
Loss = 2.2828e-03, PNorm = 170.5982, GNorm = 0.1907, lr_0 = 2.1975e-04
Loss = 2.8046e-03, PNorm = 170.6039, GNorm = 0.0936, lr_0 = 2.1960e-04
Loss = 2.7610e-03, PNorm = 170.6126, GNorm = 0.1679, lr_0 = 2.1945e-04
Loss = 2.8125e-03, PNorm = 170.6177, GNorm = 0.2121, lr_0 = 2.1930e-04
Loss = 4.0293e-03, PNorm = 170.6211, GNorm = 0.1461, lr_0 = 2.1915e-04
Loss = 2.3893e-03, PNorm = 170.6252, GNorm = 0.0930, lr_0 = 2.1900e-04
Loss = 3.7088e-03, PNorm = 170.6306, GNorm = 0.3158, lr_0 = 2.1885e-04
Loss = 2.6518e-03, PNorm = 170.6349, GNorm = 0.1010, lr_0 = 2.1870e-04
Loss = 2.9921e-03, PNorm = 170.6394, GNorm = 0.1605, lr_0 = 2.1855e-04
Loss = 3.0720e-03, PNorm = 170.6442, GNorm = 0.0641, lr_0 = 2.1840e-04
Loss = 2.8180e-03, PNorm = 170.6501, GNorm = 0.3310, lr_0 = 2.1825e-04
Loss = 3.0196e-03, PNorm = 170.6546, GNorm = 0.1796, lr_0 = 2.1810e-04
Loss = 3.7576e-03, PNorm = 170.6577, GNorm = 0.1823, lr_0 = 2.1795e-04
Loss = 2.7015e-03, PNorm = 170.6598, GNorm = 0.1416, lr_0 = 2.1780e-04
Loss = 3.5095e-03, PNorm = 170.6627, GNorm = 0.1308, lr_0 = 2.1765e-04
Loss = 2.8340e-03, PNorm = 170.6688, GNorm = 0.0585, lr_0 = 2.1751e-04
Loss = 3.9214e-03, PNorm = 170.6750, GNorm = 0.3122, lr_0 = 2.1736e-04
Loss = 2.9445e-03, PNorm = 170.6818, GNorm = 0.1006, lr_0 = 2.1721e-04
Loss = 2.4629e-03, PNorm = 170.6881, GNorm = 0.1660, lr_0 = 2.1706e-04
Loss = 2.9191e-03, PNorm = 170.6961, GNorm = 0.3008, lr_0 = 2.1691e-04
Loss = 2.8229e-03, PNorm = 170.7016, GNorm = 0.1660, lr_0 = 2.1676e-04
Loss = 2.2751e-03, PNorm = 170.7059, GNorm = 0.2311, lr_0 = 2.1661e-04
Loss = 2.7762e-03, PNorm = 170.7087, GNorm = 0.1577, lr_0 = 2.1646e-04
Loss = 2.9894e-03, PNorm = 170.7127, GNorm = 0.1486, lr_0 = 2.1632e-04
Loss = 2.5980e-03, PNorm = 170.7189, GNorm = 0.4587, lr_0 = 2.1617e-04
Loss = 3.5708e-03, PNorm = 170.7229, GNorm = 0.1626, lr_0 = 2.1602e-04
Loss = 2.5337e-03, PNorm = 170.7278, GNorm = 0.2737, lr_0 = 2.1587e-04
Loss = 2.8116e-03, PNorm = 170.7345, GNorm = 0.1636, lr_0 = 2.1572e-04
Loss = 3.0410e-03, PNorm = 170.7412, GNorm = 0.1182, lr_0 = 2.1558e-04
Loss = 6.6343e-03, PNorm = 170.7454, GNorm = 0.4103, lr_0 = 2.1543e-04
Loss = 2.8994e-03, PNorm = 170.7517, GNorm = 0.1220, lr_0 = 2.1528e-04
Loss = 4.0710e-03, PNorm = 170.7562, GNorm = 0.2096, lr_0 = 2.1513e-04
Loss = 2.9181e-03, PNorm = 170.7612, GNorm = 0.2152, lr_0 = 2.1499e-04
Loss = 3.3198e-03, PNorm = 170.7636, GNorm = 0.2845, lr_0 = 2.1484e-04
Loss = 3.0969e-03, PNorm = 170.7694, GNorm = 0.0688, lr_0 = 2.1469e-04
Loss = 2.9088e-03, PNorm = 170.7769, GNorm = 0.0800, lr_0 = 2.1454e-04
Loss = 2.9065e-03, PNorm = 170.7824, GNorm = 0.1228, lr_0 = 2.1440e-04
Loss = 2.2602e-03, PNorm = 170.7874, GNorm = 0.1007, lr_0 = 2.1425e-04
Loss = 6.2091e-03, PNorm = 170.7940, GNorm = 0.2808, lr_0 = 2.1410e-04
Loss = 5.4047e-03, PNorm = 170.8036, GNorm = 0.2265, lr_0 = 2.1396e-04
Loss = 2.2199e-03, PNorm = 170.8100, GNorm = 0.0536, lr_0 = 2.1381e-04
Loss = 4.2318e-03, PNorm = 170.8156, GNorm = 0.3776, lr_0 = 2.1366e-04
Loss = 3.4917e-03, PNorm = 170.8197, GNorm = 0.2236, lr_0 = 2.1352e-04
Loss = 2.2423e-03, PNorm = 170.8224, GNorm = 0.1433, lr_0 = 2.1337e-04
Loss = 2.5501e-03, PNorm = 170.8271, GNorm = 0.0531, lr_0 = 2.1323e-04
Loss = 2.4753e-03, PNorm = 170.8313, GNorm = 0.1229, lr_0 = 2.1308e-04
Loss = 3.0659e-03, PNorm = 170.8367, GNorm = 0.2384, lr_0 = 2.1293e-04
Loss = 2.0037e-03, PNorm = 170.8412, GNorm = 0.1865, lr_0 = 2.1279e-04
Loss = 3.0570e-03, PNorm = 170.8475, GNorm = 0.3253, lr_0 = 2.1264e-04
Loss = 5.9395e-03, PNorm = 170.8498, GNorm = 0.2746, lr_0 = 2.1250e-04
Loss = 2.3902e-03, PNorm = 170.8538, GNorm = 0.2552, lr_0 = 2.1235e-04
Loss = 2.7060e-03, PNorm = 170.8596, GNorm = 0.0755, lr_0 = 2.1221e-04
Loss = 2.4017e-03, PNorm = 170.8673, GNorm = 0.1985, lr_0 = 2.1206e-04
Loss = 2.4222e-03, PNorm = 170.8744, GNorm = 0.1606, lr_0 = 2.1191e-04
Loss = 3.2456e-03, PNorm = 170.8811, GNorm = 0.1365, lr_0 = 2.1177e-04
Loss = 3.1904e-03, PNorm = 170.8837, GNorm = 0.0600, lr_0 = 2.1162e-04
Loss = 2.8304e-03, PNorm = 170.8886, GNorm = 0.1533, lr_0 = 2.1148e-04
Loss = 3.1425e-03, PNorm = 170.8962, GNorm = 0.1856, lr_0 = 2.1133e-04
Loss = 2.0831e-03, PNorm = 170.9018, GNorm = 0.1811, lr_0 = 2.1119e-04
Loss = 2.5191e-03, PNorm = 170.9051, GNorm = 0.1173, lr_0 = 2.1104e-04
Loss = 2.4926e-03, PNorm = 170.9083, GNorm = 0.0902, lr_0 = 2.1090e-04
Loss = 2.4916e-03, PNorm = 170.9106, GNorm = 0.2395, lr_0 = 2.1076e-04
Loss = 2.5561e-03, PNorm = 170.9145, GNorm = 0.1967, lr_0 = 2.1061e-04
Loss = 2.9672e-03, PNorm = 170.9219, GNorm = 0.0768, lr_0 = 2.1047e-04
Loss = 3.2958e-03, PNorm = 170.9284, GNorm = 0.0748, lr_0 = 2.1032e-04
Loss = 4.9858e-03, PNorm = 170.9351, GNorm = 0.1341, lr_0 = 2.1018e-04
Loss = 3.2061e-03, PNorm = 170.9416, GNorm = 0.0908, lr_0 = 2.1003e-04
Loss = 4.5829e-03, PNorm = 170.9491, GNorm = 0.2326, lr_0 = 2.0989e-04
Loss = 3.3205e-03, PNorm = 170.9564, GNorm = 0.0791, lr_0 = 2.0975e-04
Loss = 3.7940e-03, PNorm = 170.9620, GNorm = 0.2765, lr_0 = 2.0960e-04
Validation mae = 0.278183
Epoch 21
Loss = 2.4042e-03, PNorm = 170.9660, GNorm = 0.1842, lr_0 = 2.0946e-04
Loss = 2.1822e-03, PNorm = 170.9718, GNorm = 0.0465, lr_0 = 2.0932e-04
Loss = 3.2462e-03, PNorm = 170.9758, GNorm = 0.0555, lr_0 = 2.0917e-04
Loss = 2.2910e-03, PNorm = 170.9803, GNorm = 0.1236, lr_0 = 2.0903e-04
Loss = 2.4566e-03, PNorm = 170.9830, GNorm = 0.0993, lr_0 = 2.0889e-04
Loss = 2.6159e-03, PNorm = 170.9859, GNorm = 0.2288, lr_0 = 2.0874e-04
Loss = 2.4445e-03, PNorm = 170.9895, GNorm = 0.1325, lr_0 = 2.0860e-04
Loss = 2.5309e-03, PNorm = 170.9939, GNorm = 0.2741, lr_0 = 2.0846e-04
Loss = 2.3738e-03, PNorm = 170.9977, GNorm = 0.0347, lr_0 = 2.0831e-04
Loss = 2.7465e-03, PNorm = 171.0008, GNorm = 0.0728, lr_0 = 2.0817e-04
Loss = 2.2394e-03, PNorm = 171.0061, GNorm = 0.1097, lr_0 = 2.0803e-04
Loss = 2.6090e-03, PNorm = 171.0131, GNorm = 0.1667, lr_0 = 2.0789e-04
Loss = 2.4786e-03, PNorm = 171.0161, GNorm = 0.0462, lr_0 = 2.0774e-04
Loss = 3.6098e-03, PNorm = 171.0174, GNorm = 0.1195, lr_0 = 2.0760e-04
Loss = 4.0637e-03, PNorm = 171.0209, GNorm = 0.1146, lr_0 = 2.0746e-04
Loss = 2.5640e-03, PNorm = 171.0230, GNorm = 0.0609, lr_0 = 2.0732e-04
Loss = 2.5538e-03, PNorm = 171.0242, GNorm = 0.1949, lr_0 = 2.0718e-04
Loss = 3.4300e-03, PNorm = 171.0266, GNorm = 0.1388, lr_0 = 2.0703e-04
Loss = 1.7712e-03, PNorm = 171.0317, GNorm = 0.0875, lr_0 = 2.0689e-04
Loss = 3.1145e-03, PNorm = 171.0378, GNorm = 0.1491, lr_0 = 2.0675e-04
Loss = 1.8508e-03, PNorm = 171.0406, GNorm = 0.1286, lr_0 = 2.0661e-04
Loss = 3.6746e-03, PNorm = 171.0451, GNorm = 0.1846, lr_0 = 2.0647e-04
Loss = 2.8661e-03, PNorm = 171.0481, GNorm = 0.0885, lr_0 = 2.0633e-04
Loss = 2.3355e-03, PNorm = 171.0530, GNorm = 0.1037, lr_0 = 2.0618e-04
Loss = 2.1476e-03, PNorm = 171.0568, GNorm = 0.1863, lr_0 = 2.0604e-04
Loss = 2.2697e-03, PNorm = 171.0611, GNorm = 0.1953, lr_0 = 2.0590e-04
Loss = 2.5466e-03, PNorm = 171.0640, GNorm = 0.0850, lr_0 = 2.0576e-04
Loss = 1.9886e-03, PNorm = 171.0663, GNorm = 0.0953, lr_0 = 2.0562e-04
Loss = 2.6765e-03, PNorm = 171.0691, GNorm = 0.1093, lr_0 = 2.0548e-04
Loss = 2.8689e-03, PNorm = 171.0716, GNorm = 0.1220, lr_0 = 2.0534e-04
Loss = 2.6582e-03, PNorm = 171.0733, GNorm = 0.0805, lr_0 = 2.0520e-04
Loss = 3.0347e-03, PNorm = 171.0758, GNorm = 0.0819, lr_0 = 2.0506e-04
Loss = 2.9392e-03, PNorm = 171.0807, GNorm = 0.1202, lr_0 = 2.0492e-04
Loss = 2.6439e-03, PNorm = 171.0876, GNorm = 0.1907, lr_0 = 2.0478e-04
Loss = 1.6761e-03, PNorm = 171.0934, GNorm = 0.1327, lr_0 = 2.0464e-04
Loss = 2.0679e-03, PNorm = 171.0969, GNorm = 0.0715, lr_0 = 2.0450e-04
Loss = 2.3251e-03, PNorm = 171.1025, GNorm = 0.1557, lr_0 = 2.0436e-04
Loss = 2.2294e-03, PNorm = 171.1080, GNorm = 0.0657, lr_0 = 2.0422e-04
Loss = 2.1937e-03, PNorm = 171.1134, GNorm = 0.0919, lr_0 = 2.0408e-04
Loss = 2.3855e-03, PNorm = 171.1157, GNorm = 0.0711, lr_0 = 2.0394e-04
Loss = 2.9078e-03, PNorm = 171.1169, GNorm = 0.1946, lr_0 = 2.0380e-04
Loss = 1.9577e-03, PNorm = 171.1182, GNorm = 0.1454, lr_0 = 2.0366e-04
Loss = 1.6681e-03, PNorm = 171.1223, GNorm = 0.1691, lr_0 = 2.0352e-04
Loss = 2.8324e-03, PNorm = 171.1261, GNorm = 0.1241, lr_0 = 2.0338e-04
Loss = 1.7767e-03, PNorm = 171.1290, GNorm = 0.1709, lr_0 = 2.0324e-04
Loss = 2.7329e-03, PNorm = 171.1327, GNorm = 0.2071, lr_0 = 2.0310e-04
Loss = 2.6977e-03, PNorm = 171.1380, GNorm = 0.1006, lr_0 = 2.0296e-04
Loss = 3.3597e-03, PNorm = 171.1442, GNorm = 0.2087, lr_0 = 2.0282e-04
Loss = 2.4538e-03, PNorm = 171.1476, GNorm = 0.2373, lr_0 = 2.0268e-04
Loss = 2.4701e-03, PNorm = 171.1510, GNorm = 0.0565, lr_0 = 2.0254e-04
Loss = 2.0120e-03, PNorm = 171.1525, GNorm = 0.0940, lr_0 = 2.0240e-04
Loss = 2.6162e-03, PNorm = 171.1572, GNorm = 0.0988, lr_0 = 2.0227e-04
Loss = 2.6710e-03, PNorm = 171.1620, GNorm = 0.2209, lr_0 = 2.0213e-04
Loss = 2.2212e-03, PNorm = 171.1686, GNorm = 0.1370, lr_0 = 2.0199e-04
Loss = 3.3131e-03, PNorm = 171.1724, GNorm = 0.6436, lr_0 = 2.0185e-04
Loss = 2.0070e-03, PNorm = 171.1754, GNorm = 0.3374, lr_0 = 2.0171e-04
Loss = 3.6138e-03, PNorm = 171.1818, GNorm = 0.1351, lr_0 = 2.0157e-04
Loss = 2.7774e-03, PNorm = 171.1865, GNorm = 0.2860, lr_0 = 2.0144e-04
Loss = 2.7341e-03, PNorm = 171.1902, GNorm = 0.3007, lr_0 = 2.0130e-04
Loss = 2.7213e-03, PNorm = 171.1948, GNorm = 0.2103, lr_0 = 2.0116e-04
Loss = 2.5494e-03, PNorm = 171.1988, GNorm = 0.1664, lr_0 = 2.0102e-04
Loss = 2.7995e-03, PNorm = 171.2034, GNorm = 0.0992, lr_0 = 2.0088e-04
Loss = 2.1471e-03, PNorm = 171.2076, GNorm = 0.1319, lr_0 = 2.0075e-04
Loss = 1.9230e-03, PNorm = 171.2113, GNorm = 0.0902, lr_0 = 2.0061e-04
Loss = 2.1321e-03, PNorm = 171.2170, GNorm = 0.1176, lr_0 = 2.0047e-04
Loss = 2.3556e-03, PNorm = 171.2221, GNorm = 0.2676, lr_0 = 2.0033e-04
Loss = 4.5109e-03, PNorm = 171.2263, GNorm = 0.0905, lr_0 = 2.0020e-04
Loss = 4.8241e-03, PNorm = 171.2304, GNorm = 0.3327, lr_0 = 2.0006e-04
Loss = 2.0753e-03, PNorm = 171.2329, GNorm = 0.0687, lr_0 = 1.9992e-04
Loss = 3.2271e-03, PNorm = 171.2363, GNorm = 0.2045, lr_0 = 1.9979e-04
Loss = 2.1480e-03, PNorm = 171.2427, GNorm = 0.0859, lr_0 = 1.9965e-04
Loss = 2.5866e-03, PNorm = 171.2486, GNorm = 0.0473, lr_0 = 1.9951e-04
Loss = 2.8349e-03, PNorm = 171.2507, GNorm = 0.2158, lr_0 = 1.9938e-04
Loss = 1.7921e-03, PNorm = 171.2544, GNorm = 0.0601, lr_0 = 1.9924e-04
Loss = 1.8049e-03, PNorm = 171.2595, GNorm = 0.0358, lr_0 = 1.9910e-04
Loss = 4.2155e-03, PNorm = 171.2653, GNorm = 0.2946, lr_0 = 1.9897e-04
Loss = 2.8702e-03, PNorm = 171.2703, GNorm = 0.1486, lr_0 = 1.9883e-04
Loss = 2.0296e-03, PNorm = 171.2742, GNorm = 0.0749, lr_0 = 1.9869e-04
Loss = 3.7240e-03, PNorm = 171.2780, GNorm = 0.2921, lr_0 = 1.9856e-04
Loss = 2.2029e-03, PNorm = 171.2818, GNorm = 0.2473, lr_0 = 1.9842e-04
Loss = 1.8007e-03, PNorm = 171.2856, GNorm = 0.0856, lr_0 = 1.9829e-04
Loss = 3.7343e-03, PNorm = 171.2892, GNorm = 0.2437, lr_0 = 1.9815e-04
Loss = 2.5401e-03, PNorm = 171.2928, GNorm = 0.2060, lr_0 = 1.9801e-04
Loss = 3.7806e-03, PNorm = 171.2965, GNorm = 0.0526, lr_0 = 1.9788e-04
Loss = 3.2895e-03, PNorm = 171.3026, GNorm = 0.1111, lr_0 = 1.9774e-04
Loss = 2.9712e-03, PNorm = 171.3095, GNorm = 0.0619, lr_0 = 1.9761e-04
Loss = 1.5662e-03, PNorm = 171.3134, GNorm = 0.0477, lr_0 = 1.9747e-04
Loss = 5.0179e-03, PNorm = 171.3186, GNorm = 0.0582, lr_0 = 1.9734e-04
Loss = 3.0001e-03, PNorm = 171.3227, GNorm = 0.5754, lr_0 = 1.9720e-04
Loss = 2.5885e-03, PNorm = 171.3279, GNorm = 0.1228, lr_0 = 1.9707e-04
Loss = 2.6064e-03, PNorm = 171.3313, GNorm = 0.2245, lr_0 = 1.9693e-04
Loss = 3.2561e-03, PNorm = 171.3366, GNorm = 0.1178, lr_0 = 1.9680e-04
Loss = 2.3945e-03, PNorm = 171.3405, GNorm = 0.0667, lr_0 = 1.9666e-04
Loss = 2.3962e-03, PNorm = 171.3436, GNorm = 0.0633, lr_0 = 1.9653e-04
Loss = 4.3381e-03, PNorm = 171.3480, GNorm = 0.0750, lr_0 = 1.9639e-04
Loss = 1.9314e-03, PNorm = 171.3536, GNorm = 0.0679, lr_0 = 1.9626e-04
Loss = 2.1506e-03, PNorm = 171.3601, GNorm = 0.0998, lr_0 = 1.9612e-04
Loss = 5.6059e-03, PNorm = 171.3655, GNorm = 0.2430, lr_0 = 1.9599e-04
Loss = 2.3244e-03, PNorm = 171.3681, GNorm = 0.0916, lr_0 = 1.9585e-04
Loss = 3.2628e-03, PNorm = 171.3729, GNorm = 0.1585, lr_0 = 1.9572e-04
Loss = 3.5391e-03, PNorm = 171.3778, GNorm = 0.1702, lr_0 = 1.9559e-04
Loss = 1.9735e-03, PNorm = 171.3815, GNorm = 0.2984, lr_0 = 1.9545e-04
Loss = 2.5641e-03, PNorm = 171.3846, GNorm = 0.0472, lr_0 = 1.9532e-04
Loss = 2.5930e-03, PNorm = 171.3899, GNorm = 0.0440, lr_0 = 1.9518e-04
Loss = 2.5411e-03, PNorm = 171.3964, GNorm = 0.2127, lr_0 = 1.9505e-04
Loss = 2.6356e-03, PNorm = 171.4027, GNorm = 0.0477, lr_0 = 1.9492e-04
Loss = 2.4026e-03, PNorm = 171.4085, GNorm = 0.0754, lr_0 = 1.9478e-04
Loss = 2.5328e-03, PNorm = 171.4121, GNorm = 0.0609, lr_0 = 1.9465e-04
Loss = 1.8469e-03, PNorm = 171.4157, GNorm = 0.0552, lr_0 = 1.9452e-04
Loss = 3.5597e-03, PNorm = 171.4190, GNorm = 0.2818, lr_0 = 1.9438e-04
Loss = 3.3838e-03, PNorm = 171.4232, GNorm = 0.0386, lr_0 = 1.9425e-04
Loss = 3.7838e-03, PNorm = 171.4266, GNorm = 0.1938, lr_0 = 1.9412e-04
Loss = 2.3769e-03, PNorm = 171.4291, GNorm = 0.1014, lr_0 = 1.9398e-04
Loss = 1.7999e-03, PNorm = 171.4338, GNorm = 0.0677, lr_0 = 1.9385e-04
Loss = 3.1306e-03, PNorm = 171.4389, GNorm = 0.1559, lr_0 = 1.9372e-04
Loss = 3.1287e-03, PNorm = 171.4460, GNorm = 0.3037, lr_0 = 1.9359e-04
Loss = 1.9532e-03, PNorm = 171.4528, GNorm = 0.0830, lr_0 = 1.9345e-04
Loss = 1.9676e-03, PNorm = 171.4593, GNorm = 0.2089, lr_0 = 1.9332e-04
Loss = 2.7067e-03, PNorm = 171.4653, GNorm = 0.1762, lr_0 = 1.9319e-04
Loss = 3.8611e-03, PNorm = 171.4688, GNorm = 0.0302, lr_0 = 1.9306e-04
Validation mae = 0.278278
Epoch 22
Loss = 2.2826e-03, PNorm = 171.4704, GNorm = 0.1290, lr_0 = 1.9292e-04
Loss = 2.1646e-03, PNorm = 171.4726, GNorm = 0.1319, lr_0 = 1.9279e-04
Loss = 2.3030e-03, PNorm = 171.4755, GNorm = 0.1312, lr_0 = 1.9266e-04
Loss = 1.8082e-03, PNorm = 171.4785, GNorm = 0.1451, lr_0 = 1.9253e-04
Loss = 1.8930e-03, PNorm = 171.4818, GNorm = 0.2091, lr_0 = 1.9240e-04
Loss = 1.6082e-03, PNorm = 171.4850, GNorm = 0.3310, lr_0 = 1.9226e-04
Loss = 1.7743e-03, PNorm = 171.4893, GNorm = 0.0392, lr_0 = 1.9213e-04
Loss = 2.7398e-03, PNorm = 171.4928, GNorm = 0.1530, lr_0 = 1.9200e-04
Loss = 1.8767e-03, PNorm = 171.4961, GNorm = 0.0450, lr_0 = 1.9187e-04
Loss = 1.7257e-03, PNorm = 171.4986, GNorm = 0.0489, lr_0 = 1.9174e-04
Loss = 1.5662e-03, PNorm = 171.5024, GNorm = 0.1722, lr_0 = 1.9161e-04
Loss = 3.2319e-03, PNorm = 171.5042, GNorm = 0.1059, lr_0 = 1.9148e-04
Loss = 2.1625e-03, PNorm = 171.5071, GNorm = 0.1493, lr_0 = 1.9134e-04
Loss = 2.4433e-03, PNorm = 171.5073, GNorm = 0.0962, lr_0 = 1.9121e-04
Loss = 1.7890e-03, PNorm = 171.5094, GNorm = 0.1576, lr_0 = 1.9108e-04
Loss = 1.7706e-03, PNorm = 171.5118, GNorm = 0.2481, lr_0 = 1.9095e-04
Loss = 3.7257e-03, PNorm = 171.5157, GNorm = 0.2737, lr_0 = 1.9082e-04
Loss = 2.3554e-03, PNorm = 171.5194, GNorm = 0.2140, lr_0 = 1.9069e-04
Loss = 1.7898e-03, PNorm = 171.5238, GNorm = 0.1872, lr_0 = 1.9056e-04
Loss = 3.2068e-03, PNorm = 171.5286, GNorm = 0.2937, lr_0 = 1.9043e-04
Loss = 3.6391e-03, PNorm = 171.5326, GNorm = 0.3662, lr_0 = 1.9030e-04
Loss = 1.4624e-03, PNorm = 171.5367, GNorm = 0.0425, lr_0 = 1.9017e-04
Loss = 2.4797e-03, PNorm = 171.5403, GNorm = 0.2612, lr_0 = 1.9004e-04
Loss = 2.1718e-03, PNorm = 171.5435, GNorm = 0.1071, lr_0 = 1.8991e-04
Loss = 3.3370e-03, PNorm = 171.5473, GNorm = 0.0737, lr_0 = 1.8978e-04
Loss = 3.4028e-03, PNorm = 171.5499, GNorm = 0.1057, lr_0 = 1.8965e-04
Loss = 3.0036e-03, PNorm = 171.5530, GNorm = 0.2551, lr_0 = 1.8952e-04
Loss = 2.0534e-03, PNorm = 171.5571, GNorm = 0.0521, lr_0 = 1.8939e-04
Loss = 1.8024e-03, PNorm = 171.5615, GNorm = 0.1008, lr_0 = 1.8926e-04
Loss = 2.3350e-03, PNorm = 171.5660, GNorm = 0.1104, lr_0 = 1.8913e-04
Loss = 2.1573e-03, PNorm = 171.5708, GNorm = 0.1202, lr_0 = 1.8900e-04
Loss = 1.5153e-03, PNorm = 171.5762, GNorm = 0.1109, lr_0 = 1.8887e-04
Loss = 1.6738e-03, PNorm = 171.5800, GNorm = 0.1163, lr_0 = 1.8874e-04
Loss = 1.7395e-03, PNorm = 171.5816, GNorm = 0.1605, lr_0 = 1.8861e-04
Loss = 2.7314e-03, PNorm = 171.5861, GNorm = 0.1706, lr_0 = 1.8848e-04
Loss = 2.9296e-03, PNorm = 171.5880, GNorm = 0.0375, lr_0 = 1.8835e-04
Loss = 3.6367e-03, PNorm = 171.5890, GNorm = 0.2045, lr_0 = 1.8822e-04
Loss = 1.9845e-03, PNorm = 171.5941, GNorm = 0.0987, lr_0 = 1.8809e-04
Loss = 3.6203e-03, PNorm = 171.5983, GNorm = 0.1456, lr_0 = 1.8797e-04
Loss = 1.9145e-03, PNorm = 171.6034, GNorm = 0.0680, lr_0 = 1.8784e-04
Loss = 2.3943e-03, PNorm = 171.6090, GNorm = 0.1285, lr_0 = 1.8771e-04
Loss = 1.6935e-03, PNorm = 171.6148, GNorm = 0.0767, lr_0 = 1.8758e-04
Loss = 1.5401e-03, PNorm = 171.6189, GNorm = 0.0626, lr_0 = 1.8745e-04
Loss = 1.9170e-03, PNorm = 171.6218, GNorm = 0.1970, lr_0 = 1.8732e-04
Loss = 2.1102e-03, PNorm = 171.6268, GNorm = 0.0987, lr_0 = 1.8719e-04
Loss = 1.6726e-03, PNorm = 171.6326, GNorm = 0.0272, lr_0 = 1.8707e-04
Loss = 2.5634e-03, PNorm = 171.6372, GNorm = 0.0736, lr_0 = 1.8694e-04
Loss = 1.5407e-03, PNorm = 171.6408, GNorm = 0.1375, lr_0 = 1.8681e-04
Loss = 3.5239e-03, PNorm = 171.6436, GNorm = 0.2928, lr_0 = 1.8668e-04
Loss = 3.3281e-03, PNorm = 171.6462, GNorm = 0.1414, lr_0 = 1.8655e-04
Loss = 1.6362e-03, PNorm = 171.6493, GNorm = 0.2204, lr_0 = 1.8643e-04
Loss = 5.0518e-03, PNorm = 171.6551, GNorm = 0.3465, lr_0 = 1.8630e-04
Loss = 2.5375e-03, PNorm = 171.6607, GNorm = 0.1592, lr_0 = 1.8617e-04
Loss = 2.1522e-03, PNorm = 171.6650, GNorm = 0.1243, lr_0 = 1.8604e-04
Loss = 2.6901e-03, PNorm = 171.6681, GNorm = 0.0880, lr_0 = 1.8592e-04
Loss = 1.7322e-03, PNorm = 171.6715, GNorm = 0.1369, lr_0 = 1.8579e-04
Loss = 1.6625e-03, PNorm = 171.6744, GNorm = 0.0849, lr_0 = 1.8566e-04
Loss = 1.5392e-03, PNorm = 171.6790, GNorm = 0.1039, lr_0 = 1.8553e-04
Loss = 3.2356e-03, PNorm = 171.6838, GNorm = 0.0940, lr_0 = 1.8541e-04
Loss = 2.3901e-03, PNorm = 171.6864, GNorm = 0.0943, lr_0 = 1.8528e-04
Loss = 2.3644e-03, PNorm = 171.6880, GNorm = 0.0949, lr_0 = 1.8515e-04
Loss = 2.4787e-03, PNorm = 171.6923, GNorm = 0.0614, lr_0 = 1.8503e-04
Loss = 3.2087e-03, PNorm = 171.6959, GNorm = 0.1047, lr_0 = 1.8490e-04
Loss = 2.4817e-03, PNorm = 171.6997, GNorm = 0.2219, lr_0 = 1.8477e-04
Loss = 3.1738e-03, PNorm = 171.7028, GNorm = 0.0786, lr_0 = 1.8465e-04
Loss = 1.7900e-03, PNorm = 171.7072, GNorm = 0.1164, lr_0 = 1.8452e-04
Loss = 2.7403e-03, PNorm = 171.7111, GNorm = 0.0643, lr_0 = 1.8439e-04
Loss = 1.6558e-03, PNorm = 171.7148, GNorm = 0.1026, lr_0 = 1.8427e-04
Loss = 1.6670e-03, PNorm = 171.7191, GNorm = 0.2237, lr_0 = 1.8414e-04
Loss = 1.4634e-03, PNorm = 171.7217, GNorm = 0.1529, lr_0 = 1.8401e-04
Loss = 3.8564e-03, PNorm = 171.7239, GNorm = 0.0977, lr_0 = 1.8389e-04
Loss = 1.5648e-03, PNorm = 171.7281, GNorm = 0.0575, lr_0 = 1.8376e-04
Loss = 2.2509e-03, PNorm = 171.7317, GNorm = 0.2504, lr_0 = 1.8364e-04
Loss = 1.6619e-03, PNorm = 171.7360, GNorm = 0.0565, lr_0 = 1.8351e-04
Loss = 1.8364e-03, PNorm = 171.7397, GNorm = 0.0993, lr_0 = 1.8338e-04
Loss = 1.5660e-03, PNorm = 171.7422, GNorm = 0.1048, lr_0 = 1.8326e-04
Loss = 3.9492e-03, PNorm = 171.7465, GNorm = 0.1110, lr_0 = 1.8313e-04
Loss = 1.3152e-03, PNorm = 171.7505, GNorm = 0.2090, lr_0 = 1.8301e-04
Loss = 3.0140e-03, PNorm = 171.7550, GNorm = 0.3682, lr_0 = 1.8288e-04
Loss = 2.0648e-03, PNorm = 171.7584, GNorm = 0.1557, lr_0 = 1.8276e-04
Loss = 3.4771e-03, PNorm = 171.7596, GNorm = 0.1925, lr_0 = 1.8263e-04
Loss = 1.7435e-03, PNorm = 171.7623, GNorm = 0.1299, lr_0 = 1.8251e-04
Loss = 1.6439e-03, PNorm = 171.7668, GNorm = 0.0704, lr_0 = 1.8238e-04
Loss = 2.1320e-03, PNorm = 171.7706, GNorm = 0.2303, lr_0 = 1.8226e-04
Loss = 1.8325e-03, PNorm = 171.7751, GNorm = 0.0994, lr_0 = 1.8213e-04
Loss = 3.2564e-03, PNorm = 171.7817, GNorm = 0.1091, lr_0 = 1.8201e-04
Loss = 2.1748e-03, PNorm = 171.7881, GNorm = 0.1295, lr_0 = 1.8188e-04
Loss = 1.8039e-03, PNorm = 171.7925, GNorm = 0.1405, lr_0 = 1.8176e-04
Loss = 1.4090e-03, PNorm = 171.7963, GNorm = 0.1165, lr_0 = 1.8163e-04
Loss = 2.2427e-03, PNorm = 171.8007, GNorm = 0.1428, lr_0 = 1.8151e-04
Loss = 3.8362e-03, PNorm = 171.8051, GNorm = 0.2388, lr_0 = 1.8138e-04
Loss = 3.6354e-03, PNorm = 171.8094, GNorm = 0.5329, lr_0 = 1.8126e-04
Loss = 4.9250e-03, PNorm = 171.8119, GNorm = 0.0483, lr_0 = 1.8114e-04
Loss = 3.4119e-03, PNorm = 171.8152, GNorm = 0.0782, lr_0 = 1.8101e-04
Loss = 1.7093e-03, PNorm = 171.8198, GNorm = 0.0576, lr_0 = 1.8089e-04
Loss = 2.0741e-03, PNorm = 171.8246, GNorm = 0.0734, lr_0 = 1.8076e-04
Loss = 2.0479e-03, PNorm = 171.8282, GNorm = 0.1000, lr_0 = 1.8064e-04
Loss = 1.9303e-03, PNorm = 171.8329, GNorm = 0.2141, lr_0 = 1.8052e-04
Loss = 2.8162e-03, PNorm = 171.8367, GNorm = 0.0939, lr_0 = 1.8039e-04
Loss = 1.5469e-03, PNorm = 171.8394, GNorm = 0.1481, lr_0 = 1.8027e-04
Loss = 1.8634e-03, PNorm = 171.8411, GNorm = 0.2978, lr_0 = 1.8015e-04
Loss = 1.5834e-03, PNorm = 171.8427, GNorm = 0.1553, lr_0 = 1.8002e-04
Loss = 1.6086e-03, PNorm = 171.8470, GNorm = 0.1153, lr_0 = 1.7990e-04
Loss = 2.3559e-03, PNorm = 171.8513, GNorm = 0.0323, lr_0 = 1.7978e-04
Loss = 1.6838e-03, PNorm = 171.8555, GNorm = 0.0479, lr_0 = 1.7965e-04
Loss = 2.3563e-03, PNorm = 171.8590, GNorm = 0.0906, lr_0 = 1.7953e-04
Loss = 1.4060e-03, PNorm = 171.8612, GNorm = 0.1397, lr_0 = 1.7941e-04
Loss = 3.8487e-03, PNorm = 171.8641, GNorm = 0.1517, lr_0 = 1.7928e-04
Loss = 2.1899e-03, PNorm = 171.8685, GNorm = 0.1737, lr_0 = 1.7916e-04
Loss = 3.8524e-03, PNorm = 171.8711, GNorm = 0.2252, lr_0 = 1.7904e-04
Loss = 4.2144e-03, PNorm = 171.8722, GNorm = 0.1468, lr_0 = 1.7892e-04
Loss = 3.7457e-03, PNorm = 171.8760, GNorm = 0.1907, lr_0 = 1.7879e-04
Loss = 1.5984e-03, PNorm = 171.8807, GNorm = 0.0969, lr_0 = 1.7867e-04
Loss = 1.8153e-03, PNorm = 171.8856, GNorm = 0.0646, lr_0 = 1.7855e-04
Loss = 2.4811e-03, PNorm = 171.8888, GNorm = 0.2752, lr_0 = 1.7843e-04
Loss = 3.0379e-03, PNorm = 171.8943, GNorm = 0.0857, lr_0 = 1.7830e-04
Loss = 1.3681e-03, PNorm = 171.8989, GNorm = 0.1207, lr_0 = 1.7818e-04
Loss = 3.3752e-03, PNorm = 171.9022, GNorm = 0.1000, lr_0 = 1.7806e-04
Loss = 2.7521e-03, PNorm = 171.9055, GNorm = 0.0876, lr_0 = 1.7794e-04
Loss = 2.3847e-03, PNorm = 171.9072, GNorm = 0.0771, lr_0 = 1.7782e-04
Validation mae = 0.277958
Epoch 23
Loss = 2.0212e-03, PNorm = 171.9100, GNorm = 0.1725, lr_0 = 1.7769e-04
Loss = 1.6794e-03, PNorm = 171.9115, GNorm = 0.1121, lr_0 = 1.7757e-04
Loss = 1.7942e-03, PNorm = 171.9134, GNorm = 0.1313, lr_0 = 1.7745e-04
Loss = 1.7638e-03, PNorm = 171.9152, GNorm = 0.1042, lr_0 = 1.7733e-04
Loss = 1.2614e-03, PNorm = 171.9174, GNorm = 0.0651, lr_0 = 1.7721e-04
Loss = 1.6351e-03, PNorm = 171.9200, GNorm = 0.1742, lr_0 = 1.7709e-04
Loss = 1.7635e-03, PNorm = 171.9243, GNorm = 0.1120, lr_0 = 1.7696e-04
Loss = 1.7230e-03, PNorm = 171.9277, GNorm = 0.0712, lr_0 = 1.7684e-04
Loss = 1.3063e-03, PNorm = 171.9286, GNorm = 0.0492, lr_0 = 1.7672e-04
Loss = 3.1516e-03, PNorm = 171.9311, GNorm = 0.0862, lr_0 = 1.7660e-04
Loss = 1.5552e-03, PNorm = 171.9341, GNorm = 0.1374, lr_0 = 1.7648e-04
Loss = 2.6472e-03, PNorm = 171.9372, GNorm = 0.3292, lr_0 = 1.7636e-04
Loss = 2.6168e-03, PNorm = 171.9387, GNorm = 0.2333, lr_0 = 1.7624e-04
Loss = 1.3925e-03, PNorm = 171.9415, GNorm = 0.1509, lr_0 = 1.7612e-04
Loss = 1.6039e-03, PNorm = 171.9458, GNorm = 0.0890, lr_0 = 1.7600e-04
Loss = 2.0251e-03, PNorm = 171.9494, GNorm = 0.3738, lr_0 = 1.7588e-04
Loss = 1.4735e-03, PNorm = 171.9502, GNorm = 0.0341, lr_0 = 1.7576e-04
Loss = 1.4105e-03, PNorm = 171.9537, GNorm = 0.0863, lr_0 = 1.7564e-04
Loss = 1.6322e-03, PNorm = 171.9585, GNorm = 0.0544, lr_0 = 1.7552e-04
Loss = 1.5715e-03, PNorm = 171.9637, GNorm = 0.0719, lr_0 = 1.7540e-04
Loss = 3.3274e-03, PNorm = 171.9651, GNorm = 0.0837, lr_0 = 1.7528e-04
Loss = 2.2755e-03, PNorm = 171.9671, GNorm = 0.1601, lr_0 = 1.7516e-04
Loss = 1.6050e-03, PNorm = 171.9687, GNorm = 0.0587, lr_0 = 1.7504e-04
Loss = 2.4181e-03, PNorm = 171.9720, GNorm = 0.0656, lr_0 = 1.7492e-04
Loss = 1.6960e-03, PNorm = 171.9749, GNorm = 0.0579, lr_0 = 1.7480e-04
Loss = 2.4150e-03, PNorm = 171.9790, GNorm = 0.2419, lr_0 = 1.7468e-04
Loss = 1.5714e-03, PNorm = 171.9830, GNorm = 0.2024, lr_0 = 1.7456e-04
Loss = 2.4510e-03, PNorm = 171.9866, GNorm = 0.2036, lr_0 = 1.7444e-04
Loss = 1.7241e-03, PNorm = 171.9896, GNorm = 0.1776, lr_0 = 1.7432e-04
Loss = 1.3646e-03, PNorm = 171.9929, GNorm = 0.0630, lr_0 = 1.7420e-04
Loss = 1.2775e-03, PNorm = 171.9973, GNorm = 0.1096, lr_0 = 1.7408e-04
Loss = 2.6407e-03, PNorm = 172.0013, GNorm = 0.7563, lr_0 = 1.7396e-04
Loss = 2.8308e-03, PNorm = 172.0031, GNorm = 0.1835, lr_0 = 1.7384e-04
Loss = 3.6127e-03, PNorm = 172.0069, GNorm = 0.1418, lr_0 = 1.7372e-04
Loss = 1.3343e-03, PNorm = 172.0097, GNorm = 0.1458, lr_0 = 1.7360e-04
Loss = 2.3100e-03, PNorm = 172.0133, GNorm = 0.0879, lr_0 = 1.7348e-04
Loss = 1.7965e-03, PNorm = 172.0166, GNorm = 0.0434, lr_0 = 1.7336e-04
Loss = 2.6096e-03, PNorm = 172.0192, GNorm = 0.1451, lr_0 = 1.7325e-04
Loss = 2.4871e-03, PNorm = 172.0200, GNorm = 0.0493, lr_0 = 1.7313e-04
Loss = 4.5809e-03, PNorm = 172.0231, GNorm = 0.4997, lr_0 = 1.7301e-04
Loss = 2.4664e-03, PNorm = 172.0277, GNorm = 0.1995, lr_0 = 1.7289e-04
Loss = 1.2217e-03, PNorm = 172.0311, GNorm = 0.0890, lr_0 = 1.7277e-04
Loss = 1.3886e-03, PNorm = 172.0351, GNorm = 0.0765, lr_0 = 1.7265e-04
Loss = 2.4221e-03, PNorm = 172.0390, GNorm = 0.0938, lr_0 = 1.7253e-04
Loss = 1.2299e-03, PNorm = 172.0425, GNorm = 0.2103, lr_0 = 1.7242e-04
Loss = 2.4760e-03, PNorm = 172.0452, GNorm = 0.0980, lr_0 = 1.7230e-04
Loss = 1.8065e-03, PNorm = 172.0481, GNorm = 0.1708, lr_0 = 1.7218e-04
Loss = 2.8998e-03, PNorm = 172.0502, GNorm = 0.0440, lr_0 = 1.7206e-04
Loss = 2.4579e-03, PNorm = 172.0529, GNorm = 0.2249, lr_0 = 1.7194e-04
Loss = 1.8596e-03, PNorm = 172.0573, GNorm = 0.0707, lr_0 = 1.7183e-04
Loss = 5.7575e-03, PNorm = 172.0609, GNorm = 0.1317, lr_0 = 1.7171e-04
Loss = 1.7124e-03, PNorm = 172.0633, GNorm = 0.0593, lr_0 = 1.7159e-04
Loss = 1.3624e-03, PNorm = 172.0664, GNorm = 0.1589, lr_0 = 1.7147e-04
Loss = 1.3649e-03, PNorm = 172.0696, GNorm = 0.1610, lr_0 = 1.7136e-04
Loss = 2.0561e-03, PNorm = 172.0734, GNorm = 0.1547, lr_0 = 1.7124e-04
Loss = 1.7606e-03, PNorm = 172.0771, GNorm = 0.1258, lr_0 = 1.7112e-04
Loss = 1.6999e-03, PNorm = 172.0796, GNorm = 0.2045, lr_0 = 1.7100e-04
Loss = 2.8245e-03, PNorm = 172.0823, GNorm = 0.2677, lr_0 = 1.7089e-04
Loss = 1.6702e-03, PNorm = 172.0858, GNorm = 0.1167, lr_0 = 1.7077e-04
Loss = 1.5607e-03, PNorm = 172.0889, GNorm = 0.2037, lr_0 = 1.7065e-04
Loss = 2.0311e-03, PNorm = 172.0916, GNorm = 0.0614, lr_0 = 1.7054e-04
Loss = 4.8997e-03, PNorm = 172.0957, GNorm = 0.0938, lr_0 = 1.7042e-04
Loss = 1.8153e-03, PNorm = 172.0986, GNorm = 0.1037, lr_0 = 1.7030e-04
Loss = 2.7458e-03, PNorm = 172.0997, GNorm = 0.3156, lr_0 = 1.7019e-04
Loss = 3.4264e-03, PNorm = 172.1024, GNorm = 0.1176, lr_0 = 1.7007e-04
Loss = 1.3446e-03, PNorm = 172.1048, GNorm = 0.0805, lr_0 = 1.6995e-04
Loss = 1.4483e-03, PNorm = 172.1091, GNorm = 0.0903, lr_0 = 1.6984e-04
Loss = 4.1077e-03, PNorm = 172.1116, GNorm = 0.1356, lr_0 = 1.6972e-04
Loss = 2.6542e-03, PNorm = 172.1150, GNorm = 0.0767, lr_0 = 1.6960e-04
Loss = 4.4188e-03, PNorm = 172.1191, GNorm = 0.0817, lr_0 = 1.6949e-04
Loss = 1.3951e-03, PNorm = 172.1236, GNorm = 0.1009, lr_0 = 1.6937e-04
Loss = 1.6594e-03, PNorm = 172.1285, GNorm = 0.0446, lr_0 = 1.6926e-04
Loss = 2.2296e-03, PNorm = 172.1323, GNorm = 0.1432, lr_0 = 1.6914e-04
Loss = 3.4244e-03, PNorm = 172.1355, GNorm = 0.2694, lr_0 = 1.6902e-04
Loss = 1.8460e-03, PNorm = 172.1380, GNorm = 0.0634, lr_0 = 1.6891e-04
Loss = 2.2402e-03, PNorm = 172.1411, GNorm = 0.1033, lr_0 = 1.6879e-04
Loss = 1.3401e-03, PNorm = 172.1444, GNorm = 0.0773, lr_0 = 1.6868e-04
Loss = 3.6102e-03, PNorm = 172.1486, GNorm = 0.4059, lr_0 = 1.6856e-04
Loss = 3.5406e-03, PNorm = 172.1526, GNorm = 0.2546, lr_0 = 1.6845e-04
Loss = 1.9521e-03, PNorm = 172.1571, GNorm = 0.1524, lr_0 = 1.6833e-04
Loss = 1.3918e-03, PNorm = 172.1596, GNorm = 0.0828, lr_0 = 1.6821e-04
Loss = 2.1175e-03, PNorm = 172.1634, GNorm = 0.1043, lr_0 = 1.6810e-04
Loss = 1.6602e-03, PNorm = 172.1667, GNorm = 0.1713, lr_0 = 1.6798e-04
Loss = 2.9619e-03, PNorm = 172.1693, GNorm = 0.2250, lr_0 = 1.6787e-04
Loss = 1.3388e-03, PNorm = 172.1702, GNorm = 0.0753, lr_0 = 1.6775e-04
Loss = 1.7459e-03, PNorm = 172.1721, GNorm = 0.0676, lr_0 = 1.6764e-04
Loss = 1.3677e-03, PNorm = 172.1745, GNorm = 0.2750, lr_0 = 1.6752e-04
Loss = 1.7720e-03, PNorm = 172.1776, GNorm = 0.1043, lr_0 = 1.6741e-04
Loss = 3.7675e-03, PNorm = 172.1825, GNorm = 0.1078, lr_0 = 1.6729e-04
Loss = 1.9222e-03, PNorm = 172.1875, GNorm = 0.0633, lr_0 = 1.6718e-04
Loss = 3.9115e-03, PNorm = 172.1913, GNorm = 0.2414, lr_0 = 1.6707e-04
Loss = 1.3176e-03, PNorm = 172.1940, GNorm = 0.1477, lr_0 = 1.6695e-04
Loss = 1.8310e-03, PNorm = 172.1975, GNorm = 0.0502, lr_0 = 1.6684e-04
Loss = 1.2354e-03, PNorm = 172.2013, GNorm = 0.2867, lr_0 = 1.6672e-04
Loss = 1.2833e-03, PNorm = 172.2043, GNorm = 0.1677, lr_0 = 1.6661e-04
Loss = 2.4202e-03, PNorm = 172.2055, GNorm = 0.3460, lr_0 = 1.6649e-04
Loss = 2.0120e-03, PNorm = 172.2086, GNorm = 0.3316, lr_0 = 1.6638e-04
Loss = 1.4336e-03, PNorm = 172.2126, GNorm = 0.2915, lr_0 = 1.6627e-04
Loss = 1.7170e-03, PNorm = 172.2157, GNorm = 0.0716, lr_0 = 1.6615e-04
Loss = 1.5443e-03, PNorm = 172.2188, GNorm = 0.1473, lr_0 = 1.6604e-04
Loss = 3.0746e-03, PNorm = 172.2208, GNorm = 0.1602, lr_0 = 1.6592e-04
Loss = 1.4762e-03, PNorm = 172.2243, GNorm = 0.2136, lr_0 = 1.6581e-04
Loss = 1.9593e-03, PNorm = 172.2280, GNorm = 0.2943, lr_0 = 1.6570e-04
Loss = 3.3322e-03, PNorm = 172.2322, GNorm = 0.0718, lr_0 = 1.6558e-04
Loss = 2.7396e-03, PNorm = 172.2354, GNorm = 0.1411, lr_0 = 1.6547e-04
Loss = 2.8434e-03, PNorm = 172.2380, GNorm = 0.1183, lr_0 = 1.6536e-04
Loss = 2.5486e-03, PNorm = 172.2417, GNorm = 0.1794, lr_0 = 1.6524e-04
Loss = 1.4230e-03, PNorm = 172.2447, GNorm = 0.1085, lr_0 = 1.6513e-04
Loss = 1.2018e-03, PNorm = 172.2481, GNorm = 0.1171, lr_0 = 1.6502e-04
Loss = 2.3617e-03, PNorm = 172.2519, GNorm = 0.0388, lr_0 = 1.6490e-04
Loss = 1.1248e-03, PNorm = 172.2551, GNorm = 0.0719, lr_0 = 1.6479e-04
Loss = 1.3546e-03, PNorm = 172.2572, GNorm = 0.0963, lr_0 = 1.6468e-04
Loss = 1.4817e-03, PNorm = 172.2598, GNorm = 0.1926, lr_0 = 1.6457e-04
Loss = 2.7525e-03, PNorm = 172.2619, GNorm = 0.1710, lr_0 = 1.6445e-04
Loss = 1.2687e-03, PNorm = 172.2658, GNorm = 0.0644, lr_0 = 1.6434e-04
Loss = 1.2252e-03, PNorm = 172.2684, GNorm = 0.0908, lr_0 = 1.6423e-04
Loss = 2.4649e-03, PNorm = 172.2692, GNorm = 0.1577, lr_0 = 1.6412e-04
Loss = 1.5611e-03, PNorm = 172.2724, GNorm = 0.1247, lr_0 = 1.6400e-04
Loss = 2.0877e-03, PNorm = 172.2766, GNorm = 0.1064, lr_0 = 1.6389e-04
Loss = 1.8344e-03, PNorm = 172.2779, GNorm = 0.2243, lr_0 = 1.6378e-04
Validation mae = 0.278284
Epoch 24
Loss = 1.9368e-03, PNorm = 172.2796, GNorm = 0.0456, lr_0 = 1.6367e-04
Loss = 1.3607e-03, PNorm = 172.2814, GNorm = 0.0671, lr_0 = 1.6355e-04
Loss = 2.2163e-03, PNorm = 172.2850, GNorm = 0.0510, lr_0 = 1.6344e-04
Loss = 1.1464e-03, PNorm = 172.2879, GNorm = 0.0782, lr_0 = 1.6333e-04
Loss = 1.6565e-03, PNorm = 172.2890, GNorm = 0.0730, lr_0 = 1.6322e-04
Loss = 1.4494e-03, PNorm = 172.2932, GNorm = 0.1046, lr_0 = 1.6311e-04
Loss = 1.1270e-03, PNorm = 172.2970, GNorm = 0.0737, lr_0 = 1.6299e-04
Loss = 2.2666e-03, PNorm = 172.2976, GNorm = 0.1690, lr_0 = 1.6288e-04
Loss = 2.7946e-03, PNorm = 172.2986, GNorm = 0.1379, lr_0 = 1.6277e-04
Loss = 1.3465e-03, PNorm = 172.3004, GNorm = 0.1519, lr_0 = 1.6266e-04
Loss = 2.0158e-03, PNorm = 172.3037, GNorm = 0.3250, lr_0 = 1.6255e-04
Loss = 6.3464e-03, PNorm = 172.3050, GNorm = 0.4205, lr_0 = 1.6244e-04
Loss = 1.2934e-03, PNorm = 172.3077, GNorm = 0.0623, lr_0 = 1.6233e-04
Loss = 1.6242e-03, PNorm = 172.3099, GNorm = 0.0954, lr_0 = 1.6221e-04
Loss = 1.2143e-03, PNorm = 172.3121, GNorm = 0.0577, lr_0 = 1.6210e-04
Loss = 2.1071e-03, PNorm = 172.3128, GNorm = 0.1127, lr_0 = 1.6199e-04
Loss = 1.6511e-03, PNorm = 172.3145, GNorm = 0.1033, lr_0 = 1.6188e-04
Loss = 1.6744e-03, PNorm = 172.3182, GNorm = 0.0733, lr_0 = 1.6177e-04
Loss = 3.6298e-03, PNorm = 172.3196, GNorm = 0.1627, lr_0 = 1.6166e-04
Loss = 1.5756e-03, PNorm = 172.3225, GNorm = 0.2456, lr_0 = 1.6155e-04
Loss = 1.3357e-03, PNorm = 172.3240, GNorm = 0.1674, lr_0 = 1.6144e-04
Loss = 1.5274e-03, PNorm = 172.3264, GNorm = 0.2457, lr_0 = 1.6133e-04
Loss = 2.8106e-03, PNorm = 172.3267, GNorm = 0.1701, lr_0 = 1.6122e-04
Loss = 1.9984e-03, PNorm = 172.3304, GNorm = 0.0710, lr_0 = 1.6111e-04
Loss = 3.3734e-03, PNorm = 172.3327, GNorm = 0.1350, lr_0 = 1.6100e-04
Loss = 1.4741e-03, PNorm = 172.3368, GNorm = 0.1589, lr_0 = 1.6089e-04
Loss = 3.0561e-03, PNorm = 172.3404, GNorm = 0.1725, lr_0 = 1.6078e-04
Loss = 1.9780e-03, PNorm = 172.3452, GNorm = 0.1227, lr_0 = 1.6067e-04
Loss = 1.5651e-03, PNorm = 172.3492, GNorm = 0.0970, lr_0 = 1.6056e-04
Loss = 1.5103e-03, PNorm = 172.3522, GNorm = 0.0674, lr_0 = 1.6045e-04
Loss = 1.9062e-03, PNorm = 172.3541, GNorm = 0.1500, lr_0 = 1.6034e-04
Loss = 2.0592e-03, PNorm = 172.3546, GNorm = 0.1241, lr_0 = 1.6023e-04
Loss = 1.9610e-03, PNorm = 172.3570, GNorm = 0.1485, lr_0 = 1.6012e-04
Loss = 1.2679e-03, PNorm = 172.3608, GNorm = 0.0812, lr_0 = 1.6001e-04
Loss = 1.3405e-03, PNorm = 172.3650, GNorm = 0.1612, lr_0 = 1.5990e-04
Loss = 1.2020e-03, PNorm = 172.3668, GNorm = 0.0970, lr_0 = 1.5979e-04
Loss = 1.3499e-03, PNorm = 172.3699, GNorm = 0.1645, lr_0 = 1.5968e-04
Loss = 1.3582e-03, PNorm = 172.3705, GNorm = 0.0329, lr_0 = 1.5957e-04
Loss = 1.6712e-03, PNorm = 172.3727, GNorm = 0.0386, lr_0 = 1.5946e-04
Loss = 1.1471e-03, PNorm = 172.3735, GNorm = 0.1793, lr_0 = 1.5935e-04
Loss = 1.9622e-03, PNorm = 172.3749, GNorm = 0.2957, lr_0 = 1.5924e-04
Loss = 3.3604e-03, PNorm = 172.3775, GNorm = 0.0857, lr_0 = 1.5913e-04
Loss = 1.3624e-03, PNorm = 172.3788, GNorm = 0.2086, lr_0 = 1.5902e-04
Loss = 2.0890e-03, PNorm = 172.3820, GNorm = 0.0590, lr_0 = 1.5891e-04
Loss = 1.3625e-03, PNorm = 172.3872, GNorm = 0.1715, lr_0 = 1.5880e-04
Loss = 1.3769e-03, PNorm = 172.3889, GNorm = 0.0447, lr_0 = 1.5870e-04
Loss = 1.2238e-03, PNorm = 172.3901, GNorm = 0.0673, lr_0 = 1.5859e-04
Loss = 1.9659e-03, PNorm = 172.3934, GNorm = 0.1646, lr_0 = 1.5848e-04
Loss = 1.5757e-03, PNorm = 172.3971, GNorm = 0.0854, lr_0 = 1.5837e-04
Loss = 1.9381e-03, PNorm = 172.3992, GNorm = 0.1733, lr_0 = 1.5826e-04
Loss = 1.4829e-03, PNorm = 172.4026, GNorm = 0.0697, lr_0 = 1.5815e-04
Loss = 2.0737e-03, PNorm = 172.4053, GNorm = 0.0449, lr_0 = 1.5804e-04
Loss = 3.0558e-03, PNorm = 172.4059, GNorm = 0.2926, lr_0 = 1.5794e-04
Loss = 1.4419e-03, PNorm = 172.4079, GNorm = 0.2163, lr_0 = 1.5783e-04
Loss = 1.5374e-03, PNorm = 172.4096, GNorm = 0.2126, lr_0 = 1.5772e-04
Loss = 1.7180e-03, PNorm = 172.4130, GNorm = 0.0395, lr_0 = 1.5761e-04
Loss = 2.3776e-03, PNorm = 172.4163, GNorm = 0.0672, lr_0 = 1.5750e-04
Loss = 1.7273e-03, PNorm = 172.4196, GNorm = 0.1619, lr_0 = 1.5740e-04
Loss = 1.0972e-03, PNorm = 172.4218, GNorm = 0.1208, lr_0 = 1.5729e-04
Loss = 1.2076e-03, PNorm = 172.4243, GNorm = 0.1130, lr_0 = 1.5718e-04
Loss = 1.1855e-03, PNorm = 172.4272, GNorm = 0.1237, lr_0 = 1.5707e-04
Loss = 1.2290e-03, PNorm = 172.4295, GNorm = 0.0534, lr_0 = 1.5697e-04
Loss = 1.0729e-03, PNorm = 172.4339, GNorm = 0.0981, lr_0 = 1.5686e-04
Loss = 1.2834e-03, PNorm = 172.4360, GNorm = 0.0442, lr_0 = 1.5675e-04
Loss = 1.5946e-03, PNorm = 172.4363, GNorm = 0.1074, lr_0 = 1.5664e-04
Loss = 1.1824e-03, PNorm = 172.4394, GNorm = 0.1483, lr_0 = 1.5654e-04
Loss = 3.9197e-03, PNorm = 172.4434, GNorm = 0.0681, lr_0 = 1.5643e-04
Loss = 1.1139e-03, PNorm = 172.4489, GNorm = 0.0382, lr_0 = 1.5632e-04
Loss = 1.6016e-03, PNorm = 172.4547, GNorm = 0.2033, lr_0 = 1.5621e-04
Loss = 3.4742e-03, PNorm = 172.4589, GNorm = 0.0699, lr_0 = 1.5611e-04
Loss = 2.2712e-03, PNorm = 172.4599, GNorm = 0.1479, lr_0 = 1.5600e-04
Loss = 1.2833e-03, PNorm = 172.4610, GNorm = 0.0610, lr_0 = 1.5589e-04
Loss = 1.3638e-03, PNorm = 172.4615, GNorm = 0.1549, lr_0 = 1.5579e-04
Loss = 2.0606e-03, PNorm = 172.4640, GNorm = 0.0506, lr_0 = 1.5568e-04
Loss = 2.1657e-03, PNorm = 172.4681, GNorm = 0.1377, lr_0 = 1.5557e-04
Loss = 1.5783e-03, PNorm = 172.4716, GNorm = 0.0545, lr_0 = 1.5547e-04
Loss = 1.8782e-03, PNorm = 172.4751, GNorm = 0.2151, lr_0 = 1.5536e-04
Loss = 1.4684e-03, PNorm = 172.4781, GNorm = 0.0805, lr_0 = 1.5525e-04
Loss = 2.1459e-03, PNorm = 172.4816, GNorm = 0.0886, lr_0 = 1.5515e-04
Loss = 2.7678e-03, PNorm = 172.4862, GNorm = 0.0869, lr_0 = 1.5504e-04
Loss = 1.1236e-03, PNorm = 172.4887, GNorm = 0.0720, lr_0 = 1.5493e-04
Loss = 2.9894e-03, PNorm = 172.4899, GNorm = 0.1461, lr_0 = 1.5483e-04
Loss = 1.1584e-03, PNorm = 172.4926, GNorm = 0.1638, lr_0 = 1.5472e-04
Loss = 2.6688e-03, PNorm = 172.4932, GNorm = 0.1230, lr_0 = 1.5462e-04
Loss = 1.4874e-03, PNorm = 172.4949, GNorm = 0.0783, lr_0 = 1.5451e-04
Loss = 1.8121e-03, PNorm = 172.4960, GNorm = 0.0751, lr_0 = 1.5440e-04
Loss = 1.3308e-03, PNorm = 172.4979, GNorm = 0.0898, lr_0 = 1.5430e-04
Loss = 1.3487e-03, PNorm = 172.4991, GNorm = 0.0482, lr_0 = 1.5419e-04
Loss = 1.1538e-03, PNorm = 172.4996, GNorm = 0.1714, lr_0 = 1.5409e-04
Loss = 3.9304e-03, PNorm = 172.5018, GNorm = 0.3199, lr_0 = 1.5398e-04
Loss = 2.0416e-03, PNorm = 172.5049, GNorm = 0.0724, lr_0 = 1.5388e-04
Loss = 1.4901e-03, PNorm = 172.5102, GNorm = 0.0615, lr_0 = 1.5377e-04
Loss = 1.5540e-03, PNorm = 172.5134, GNorm = 0.1237, lr_0 = 1.5367e-04
Loss = 1.5274e-03, PNorm = 172.5169, GNorm = 0.1025, lr_0 = 1.5356e-04
Loss = 1.5568e-03, PNorm = 172.5207, GNorm = 0.0386, lr_0 = 1.5346e-04
Loss = 8.7259e-04, PNorm = 172.5239, GNorm = 0.0600, lr_0 = 1.5335e-04
Loss = 2.1582e-03, PNorm = 172.5268, GNorm = 0.0250, lr_0 = 1.5325e-04
Loss = 2.6941e-03, PNorm = 172.5289, GNorm = 0.1375, lr_0 = 1.5314e-04
Loss = 5.0416e-03, PNorm = 172.5295, GNorm = 0.4069, lr_0 = 1.5304e-04
Loss = 1.9841e-03, PNorm = 172.5309, GNorm = 0.1770, lr_0 = 1.5293e-04
Loss = 1.9948e-03, PNorm = 172.5336, GNorm = 0.0709, lr_0 = 1.5283e-04
Loss = 2.4906e-03, PNorm = 172.5384, GNorm = 0.0624, lr_0 = 1.5272e-04
Loss = 1.5795e-03, PNorm = 172.5423, GNorm = 0.1986, lr_0 = 1.5262e-04
Loss = 2.0093e-03, PNorm = 172.5450, GNorm = 0.0969, lr_0 = 1.5251e-04
Loss = 2.2849e-03, PNorm = 172.5472, GNorm = 0.0934, lr_0 = 1.5241e-04
Loss = 2.7087e-03, PNorm = 172.5510, GNorm = 0.2334, lr_0 = 1.5230e-04
Loss = 1.3755e-03, PNorm = 172.5563, GNorm = 0.0420, lr_0 = 1.5220e-04
Loss = 3.2263e-03, PNorm = 172.5592, GNorm = 0.0537, lr_0 = 1.5209e-04
Loss = 1.3255e-03, PNorm = 172.5619, GNorm = 0.1397, lr_0 = 1.5199e-04
Loss = 3.6179e-03, PNorm = 172.5646, GNorm = 0.2460, lr_0 = 1.5189e-04
Loss = 1.1361e-03, PNorm = 172.5676, GNorm = 0.0720, lr_0 = 1.5178e-04
Loss = 1.5198e-03, PNorm = 172.5695, GNorm = 0.1101, lr_0 = 1.5168e-04
Loss = 1.8809e-03, PNorm = 172.5731, GNorm = 0.0794, lr_0 = 1.5157e-04
Loss = 1.8358e-03, PNorm = 172.5755, GNorm = 0.0412, lr_0 = 1.5147e-04
Loss = 2.5030e-03, PNorm = 172.5780, GNorm = 0.1305, lr_0 = 1.5137e-04
Loss = 1.3885e-03, PNorm = 172.5783, GNorm = 0.1050, lr_0 = 1.5126e-04
Loss = 1.3294e-03, PNorm = 172.5794, GNorm = 0.0637, lr_0 = 1.5116e-04
Loss = 1.2615e-03, PNorm = 172.5815, GNorm = 0.0835, lr_0 = 1.5106e-04
Loss = 1.9456e-03, PNorm = 172.5853, GNorm = 0.0658, lr_0 = 1.5095e-04
Loss = 2.9522e-03, PNorm = 172.5904, GNorm = 0.2098, lr_0 = 1.5085e-04
Validation mae = 0.277746
Epoch 25
Loss = 1.6950e-03, PNorm = 172.5923, GNorm = 0.1430, lr_0 = 1.5075e-04
Loss = 2.2479e-03, PNorm = 172.5957, GNorm = 0.0946, lr_0 = 1.5064e-04
Loss = 1.3821e-03, PNorm = 172.5956, GNorm = 0.0623, lr_0 = 1.5054e-04
Loss = 1.3487e-03, PNorm = 172.5966, GNorm = 0.0615, lr_0 = 1.5044e-04
Loss = 1.3138e-03, PNorm = 172.5979, GNorm = 0.0917, lr_0 = 1.5033e-04
Loss = 9.7573e-04, PNorm = 172.5997, GNorm = 0.0814, lr_0 = 1.5023e-04
Loss = 1.3692e-03, PNorm = 172.6022, GNorm = 0.0717, lr_0 = 1.5013e-04
Loss = 1.3093e-03, PNorm = 172.6037, GNorm = 0.1444, lr_0 = 1.5002e-04
Loss = 1.1841e-03, PNorm = 172.6052, GNorm = 0.1693, lr_0 = 1.4992e-04
Loss = 1.3783e-03, PNorm = 172.6063, GNorm = 0.0944, lr_0 = 1.4982e-04
Loss = 1.2972e-03, PNorm = 172.6077, GNorm = 0.0787, lr_0 = 1.4972e-04
Loss = 1.5532e-03, PNorm = 172.6092, GNorm = 0.0331, lr_0 = 1.4961e-04
Loss = 1.8664e-03, PNorm = 172.6111, GNorm = 0.1680, lr_0 = 1.4951e-04
Loss = 1.4376e-03, PNorm = 172.6123, GNorm = 0.0558, lr_0 = 1.4941e-04
Loss = 2.1165e-03, PNorm = 172.6134, GNorm = 0.0749, lr_0 = 1.4931e-04
Loss = 1.2222e-03, PNorm = 172.6142, GNorm = 0.0630, lr_0 = 1.4920e-04
Loss = 9.8930e-04, PNorm = 172.6166, GNorm = 0.0708, lr_0 = 1.4910e-04
Loss = 1.1412e-03, PNorm = 172.6183, GNorm = 0.1036, lr_0 = 1.4900e-04
Loss = 1.7707e-03, PNorm = 172.6194, GNorm = 0.1637, lr_0 = 1.4890e-04
Loss = 1.0146e-03, PNorm = 172.6211, GNorm = 0.0472, lr_0 = 1.4880e-04
Loss = 1.0323e-03, PNorm = 172.6233, GNorm = 0.0903, lr_0 = 1.4869e-04
Loss = 1.5042e-03, PNorm = 172.6259, GNorm = 0.0950, lr_0 = 1.4859e-04
Loss = 1.8665e-03, PNorm = 172.6279, GNorm = 0.3193, lr_0 = 1.4849e-04
Loss = 1.8756e-03, PNorm = 172.6304, GNorm = 0.0503, lr_0 = 1.4839e-04
Loss = 1.3092e-03, PNorm = 172.6321, GNorm = 0.0684, lr_0 = 1.4829e-04
Loss = 9.8145e-04, PNorm = 172.6349, GNorm = 0.0922, lr_0 = 1.4818e-04
Loss = 1.2713e-03, PNorm = 172.6378, GNorm = 0.2138, lr_0 = 1.4808e-04
Loss = 8.3042e-04, PNorm = 172.6404, GNorm = 0.0402, lr_0 = 1.4798e-04
Loss = 1.8091e-03, PNorm = 172.6432, GNorm = 0.0760, lr_0 = 1.4788e-04
Loss = 9.2841e-04, PNorm = 172.6467, GNorm = 0.0411, lr_0 = 1.4778e-04
Loss = 1.9192e-03, PNorm = 172.6500, GNorm = 0.0797, lr_0 = 1.4768e-04
Loss = 1.6537e-03, PNorm = 172.6531, GNorm = 0.1151, lr_0 = 1.4758e-04
Loss = 1.2548e-03, PNorm = 172.6536, GNorm = 0.1272, lr_0 = 1.4748e-04
Loss = 1.6893e-03, PNorm = 172.6548, GNorm = 0.0875, lr_0 = 1.4737e-04
Loss = 2.1793e-03, PNorm = 172.6571, GNorm = 0.2069, lr_0 = 1.4727e-04
Loss = 8.7414e-04, PNorm = 172.6592, GNorm = 0.0490, lr_0 = 1.4717e-04
Loss = 1.2264e-03, PNorm = 172.6611, GNorm = 0.1732, lr_0 = 1.4707e-04
Loss = 9.4360e-04, PNorm = 172.6642, GNorm = 0.0901, lr_0 = 1.4697e-04
Loss = 2.1444e-03, PNorm = 172.6657, GNorm = 0.0920, lr_0 = 1.4687e-04
Loss = 9.3264e-04, PNorm = 172.6680, GNorm = 0.1535, lr_0 = 1.4677e-04
Loss = 1.7999e-03, PNorm = 172.6700, GNorm = 0.0845, lr_0 = 1.4667e-04
Loss = 1.5970e-03, PNorm = 172.6710, GNorm = 0.2474, lr_0 = 1.4657e-04
Loss = 1.4202e-03, PNorm = 172.6747, GNorm = 0.0330, lr_0 = 1.4647e-04
Loss = 3.1304e-03, PNorm = 172.6775, GNorm = 0.4002, lr_0 = 1.4637e-04
Loss = 1.8757e-03, PNorm = 172.6789, GNorm = 0.3249, lr_0 = 1.4627e-04
Loss = 1.0889e-03, PNorm = 172.6809, GNorm = 0.1903, lr_0 = 1.4617e-04
Loss = 1.1564e-03, PNorm = 172.6831, GNorm = 0.1128, lr_0 = 1.4607e-04
Loss = 3.9184e-03, PNorm = 172.6860, GNorm = 0.1759, lr_0 = 1.4597e-04
Loss = 9.7773e-04, PNorm = 172.6895, GNorm = 0.0407, lr_0 = 1.4587e-04
Loss = 1.6514e-03, PNorm = 172.6930, GNorm = 0.0339, lr_0 = 1.4577e-04
Loss = 1.9065e-03, PNorm = 172.6957, GNorm = 0.1638, lr_0 = 1.4567e-04
Loss = 4.4958e-03, PNorm = 172.6996, GNorm = 0.1447, lr_0 = 1.4557e-04
Loss = 1.2746e-03, PNorm = 172.7032, GNorm = 0.0495, lr_0 = 1.4547e-04
Loss = 1.9226e-03, PNorm = 172.7064, GNorm = 0.1096, lr_0 = 1.4537e-04
Loss = 9.3844e-04, PNorm = 172.7088, GNorm = 0.1045, lr_0 = 1.4527e-04
Loss = 1.7451e-03, PNorm = 172.7100, GNorm = 0.1084, lr_0 = 1.4517e-04
Loss = 1.5914e-03, PNorm = 172.7117, GNorm = 0.0844, lr_0 = 1.4507e-04
Loss = 1.0394e-03, PNorm = 172.7127, GNorm = 0.0562, lr_0 = 1.4497e-04
Loss = 1.7153e-03, PNorm = 172.7142, GNorm = 0.0595, lr_0 = 1.4487e-04
Loss = 4.2501e-03, PNorm = 172.7152, GNorm = 0.1464, lr_0 = 1.4477e-04
Loss = 1.4833e-03, PNorm = 172.7167, GNorm = 0.1375, lr_0 = 1.4467e-04
Loss = 1.6254e-03, PNorm = 172.7194, GNorm = 0.0543, lr_0 = 1.4457e-04
Loss = 1.2113e-03, PNorm = 172.7218, GNorm = 0.1237, lr_0 = 1.4447e-04
Loss = 1.4010e-03, PNorm = 172.7236, GNorm = 0.0435, lr_0 = 1.4438e-04
Loss = 1.3449e-03, PNorm = 172.7256, GNorm = 0.0736, lr_0 = 1.4428e-04
Loss = 1.3580e-03, PNorm = 172.7280, GNorm = 0.0909, lr_0 = 1.4418e-04
Loss = 2.6981e-03, PNorm = 172.7313, GNorm = 0.1354, lr_0 = 1.4408e-04
Loss = 1.9665e-03, PNorm = 172.7343, GNorm = 0.1480, lr_0 = 1.4398e-04
Loss = 1.0548e-03, PNorm = 172.7356, GNorm = 0.0635, lr_0 = 1.4388e-04
Loss = 1.8963e-03, PNorm = 172.7366, GNorm = 0.0567, lr_0 = 1.4378e-04
Loss = 1.8725e-03, PNorm = 172.7374, GNorm = 0.1413, lr_0 = 1.4368e-04
Loss = 1.0496e-03, PNorm = 172.7399, GNorm = 0.1468, lr_0 = 1.4359e-04
Loss = 1.1555e-03, PNorm = 172.7409, GNorm = 0.1261, lr_0 = 1.4349e-04
Loss = 1.3215e-03, PNorm = 172.7427, GNorm = 0.0463, lr_0 = 1.4339e-04
Loss = 1.7322e-03, PNorm = 172.7456, GNorm = 0.0473, lr_0 = 1.4329e-04
Loss = 1.3704e-03, PNorm = 172.7495, GNorm = 0.0434, lr_0 = 1.4319e-04
Loss = 2.4336e-03, PNorm = 172.7545, GNorm = 0.1444, lr_0 = 1.4310e-04
Loss = 1.2531e-03, PNorm = 172.7582, GNorm = 0.1142, lr_0 = 1.4300e-04
Loss = 3.0932e-03, PNorm = 172.7607, GNorm = 0.1221, lr_0 = 1.4290e-04
Loss = 1.1165e-03, PNorm = 172.7626, GNorm = 0.0734, lr_0 = 1.4280e-04
Loss = 1.3217e-03, PNorm = 172.7630, GNorm = 0.0367, lr_0 = 1.4270e-04
Loss = 1.3035e-03, PNorm = 172.7636, GNorm = 0.0407, lr_0 = 1.4261e-04
Loss = 3.4990e-03, PNorm = 172.7660, GNorm = 0.0552, lr_0 = 1.4251e-04
Loss = 2.1356e-03, PNorm = 172.7699, GNorm = 0.0407, lr_0 = 1.4241e-04
Loss = 1.0468e-03, PNorm = 172.7718, GNorm = 0.0790, lr_0 = 1.4231e-04
Loss = 2.0829e-03, PNorm = 172.7740, GNorm = 0.0857, lr_0 = 1.4222e-04
Loss = 1.7353e-03, PNorm = 172.7760, GNorm = 0.1965, lr_0 = 1.4212e-04
Loss = 3.4167e-03, PNorm = 172.7780, GNorm = 0.0724, lr_0 = 1.4202e-04
Loss = 3.6830e-03, PNorm = 172.7792, GNorm = 0.1058, lr_0 = 1.4192e-04
Loss = 2.2552e-03, PNorm = 172.7806, GNorm = 0.0518, lr_0 = 1.4183e-04
Loss = 1.2932e-03, PNorm = 172.7816, GNorm = 0.0759, lr_0 = 1.4173e-04
Loss = 1.7914e-03, PNorm = 172.7832, GNorm = 0.1773, lr_0 = 1.4163e-04
Loss = 3.7125e-03, PNorm = 172.7846, GNorm = 0.1460, lr_0 = 1.4153e-04
Loss = 1.7872e-03, PNorm = 172.7875, GNorm = 0.1257, lr_0 = 1.4144e-04
Loss = 9.4320e-04, PNorm = 172.7904, GNorm = 0.1257, lr_0 = 1.4134e-04
Loss = 1.9358e-03, PNorm = 172.7935, GNorm = 0.1677, lr_0 = 1.4124e-04
Loss = 2.9612e-03, PNorm = 172.7963, GNorm = 0.0511, lr_0 = 1.4115e-04
Loss = 1.1916e-03, PNorm = 172.7978, GNorm = 0.0992, lr_0 = 1.4105e-04
Loss = 8.7142e-04, PNorm = 172.8000, GNorm = 0.0877, lr_0 = 1.4095e-04
Loss = 1.3773e-03, PNorm = 172.8023, GNorm = 0.0256, lr_0 = 1.4086e-04
Loss = 1.7006e-03, PNorm = 172.8048, GNorm = 0.0971, lr_0 = 1.4076e-04
Loss = 2.1001e-03, PNorm = 172.8070, GNorm = 0.1968, lr_0 = 1.4066e-04
Loss = 1.0121e-03, PNorm = 172.8103, GNorm = 0.0512, lr_0 = 1.4057e-04
Loss = 2.0323e-03, PNorm = 172.8113, GNorm = 0.0593, lr_0 = 1.4047e-04
Loss = 1.5354e-03, PNorm = 172.8128, GNorm = 0.0810, lr_0 = 1.4038e-04
Loss = 1.6169e-03, PNorm = 172.8154, GNorm = 0.0920, lr_0 = 1.4028e-04
Loss = 1.4649e-03, PNorm = 172.8191, GNorm = 0.1443, lr_0 = 1.4018e-04
Loss = 9.7200e-04, PNorm = 172.8223, GNorm = 0.1023, lr_0 = 1.4009e-04
Loss = 1.1973e-03, PNorm = 172.8243, GNorm = 0.1151, lr_0 = 1.3999e-04
Loss = 1.0245e-03, PNorm = 172.8265, GNorm = 0.0564, lr_0 = 1.3990e-04
Loss = 2.2113e-03, PNorm = 172.8296, GNorm = 0.0413, lr_0 = 1.3980e-04
Loss = 1.7360e-03, PNorm = 172.8312, GNorm = 0.0867, lr_0 = 1.3970e-04
Loss = 2.7819e-03, PNorm = 172.8328, GNorm = 0.2799, lr_0 = 1.3961e-04
Loss = 3.5721e-03, PNorm = 172.8342, GNorm = 0.0439, lr_0 = 1.3951e-04
Loss = 9.2782e-04, PNorm = 172.8365, GNorm = 0.0602, lr_0 = 1.3942e-04
Loss = 1.8785e-03, PNorm = 172.8409, GNorm = 0.2294, lr_0 = 1.3932e-04
Loss = 1.2396e-03, PNorm = 172.8454, GNorm = 0.0377, lr_0 = 1.3923e-04
Loss = 1.3420e-03, PNorm = 172.8488, GNorm = 0.0932, lr_0 = 1.3913e-04
Loss = 2.8046e-03, PNorm = 172.8504, GNorm = 0.1728, lr_0 = 1.3904e-04
Loss = 1.1774e-03, PNorm = 172.8522, GNorm = 0.0485, lr_0 = 1.3894e-04
Validation mae = 0.278100
Epoch 26
Loss = 1.5723e-03, PNorm = 172.8537, GNorm = 0.0739, lr_0 = 1.3884e-04
Loss = 1.2280e-03, PNorm = 172.8550, GNorm = 0.0326, lr_0 = 1.3875e-04
Loss = 1.3483e-03, PNorm = 172.8552, GNorm = 0.0825, lr_0 = 1.3865e-04
Loss = 1.5499e-03, PNorm = 172.8565, GNorm = 0.1950, lr_0 = 1.3856e-04
Loss = 1.2350e-03, PNorm = 172.8582, GNorm = 0.1415, lr_0 = 1.3846e-04
Loss = 2.8300e-03, PNorm = 172.8609, GNorm = 0.1281, lr_0 = 1.3837e-04
Loss = 1.0925e-03, PNorm = 172.8624, GNorm = 0.0904, lr_0 = 1.3828e-04
Loss = 2.1076e-03, PNorm = 172.8641, GNorm = 0.1738, lr_0 = 1.3818e-04
Loss = 1.5629e-03, PNorm = 172.8654, GNorm = 0.1120, lr_0 = 1.3809e-04
Loss = 2.0696e-03, PNorm = 172.8679, GNorm = 0.1117, lr_0 = 1.3799e-04
Loss = 7.9224e-04, PNorm = 172.8693, GNorm = 0.0401, lr_0 = 1.3790e-04
Loss = 2.2631e-03, PNorm = 172.8703, GNorm = 0.1244, lr_0 = 1.3780e-04
Loss = 1.0357e-03, PNorm = 172.8706, GNorm = 0.0781, lr_0 = 1.3771e-04
Loss = 1.1803e-03, PNorm = 172.8727, GNorm = 0.0688, lr_0 = 1.3761e-04
Loss = 1.4534e-03, PNorm = 172.8750, GNorm = 0.0881, lr_0 = 1.3752e-04
Loss = 3.2183e-03, PNorm = 172.8766, GNorm = 0.1208, lr_0 = 1.3742e-04
Loss = 8.9449e-04, PNorm = 172.8765, GNorm = 0.0586, lr_0 = 1.3733e-04
Loss = 8.9782e-04, PNorm = 172.8782, GNorm = 0.1190, lr_0 = 1.3724e-04
Loss = 1.0400e-03, PNorm = 172.8804, GNorm = 0.0570, lr_0 = 1.3714e-04
Loss = 1.1416e-03, PNorm = 172.8818, GNorm = 0.0911, lr_0 = 1.3705e-04
Loss = 1.5566e-03, PNorm = 172.8805, GNorm = 0.0434, lr_0 = 1.3695e-04
Loss = 1.0765e-03, PNorm = 172.8816, GNorm = 0.0420, lr_0 = 1.3686e-04
Loss = 1.4525e-03, PNorm = 172.8834, GNorm = 0.1485, lr_0 = 1.3677e-04
Loss = 1.3552e-03, PNorm = 172.8866, GNorm = 0.0915, lr_0 = 1.3667e-04
Loss = 1.0491e-03, PNorm = 172.8881, GNorm = 0.1368, lr_0 = 1.3658e-04
Loss = 1.5064e-03, PNorm = 172.8891, GNorm = 0.0576, lr_0 = 1.3649e-04
Loss = 2.4067e-03, PNorm = 172.8922, GNorm = 0.2086, lr_0 = 1.3639e-04
Loss = 1.1842e-03, PNorm = 172.8952, GNorm = 0.1002, lr_0 = 1.3630e-04
Loss = 1.0602e-03, PNorm = 172.8969, GNorm = 0.0755, lr_0 = 1.3621e-04
Loss = 1.6997e-03, PNorm = 172.8980, GNorm = 0.0873, lr_0 = 1.3611e-04
Loss = 1.7303e-03, PNorm = 172.8988, GNorm = 0.1250, lr_0 = 1.3602e-04
Loss = 1.2487e-03, PNorm = 172.8987, GNorm = 0.0747, lr_0 = 1.3593e-04
Loss = 1.7765e-03, PNorm = 172.8980, GNorm = 0.0399, lr_0 = 1.3583e-04
Loss = 1.1865e-03, PNorm = 172.8987, GNorm = 0.0950, lr_0 = 1.3574e-04
Loss = 2.8213e-03, PNorm = 172.9001, GNorm = 0.0872, lr_0 = 1.3565e-04
Loss = 1.1879e-03, PNorm = 172.9008, GNorm = 0.1163, lr_0 = 1.3555e-04
Loss = 9.1505e-04, PNorm = 172.9031, GNorm = 0.0914, lr_0 = 1.3546e-04
Loss = 3.3382e-03, PNorm = 172.9062, GNorm = 0.1864, lr_0 = 1.3537e-04
Loss = 7.1993e-04, PNorm = 172.9096, GNorm = 0.0580, lr_0 = 1.3528e-04
Loss = 1.5607e-03, PNorm = 172.9136, GNorm = 0.0326, lr_0 = 1.3518e-04
Loss = 1.2651e-03, PNorm = 172.9150, GNorm = 0.3849, lr_0 = 1.3509e-04
Loss = 3.4277e-03, PNorm = 172.9168, GNorm = 0.1947, lr_0 = 1.3500e-04
Loss = 1.6210e-03, PNorm = 172.9187, GNorm = 0.0861, lr_0 = 1.3491e-04
Loss = 1.5618e-03, PNorm = 172.9193, GNorm = 0.0388, lr_0 = 1.3481e-04
Loss = 8.7599e-04, PNorm = 172.9212, GNorm = 0.0962, lr_0 = 1.3472e-04
Loss = 3.2380e-03, PNorm = 172.9222, GNorm = 0.2586, lr_0 = 1.3463e-04
Loss = 1.3530e-03, PNorm = 172.9254, GNorm = 0.0573, lr_0 = 1.3454e-04
Loss = 1.1533e-03, PNorm = 172.9273, GNorm = 0.0631, lr_0 = 1.3444e-04
Loss = 1.4345e-03, PNorm = 172.9290, GNorm = 0.0429, lr_0 = 1.3435e-04
Loss = 2.0181e-03, PNorm = 172.9323, GNorm = 0.2822, lr_0 = 1.3426e-04
Loss = 1.5190e-03, PNorm = 172.9357, GNorm = 0.0703, lr_0 = 1.3417e-04
Loss = 1.2843e-03, PNorm = 172.9373, GNorm = 0.1409, lr_0 = 1.3408e-04
Loss = 1.0871e-03, PNorm = 172.9389, GNorm = 0.2188, lr_0 = 1.3398e-04
Loss = 1.9455e-03, PNorm = 172.9390, GNorm = 0.3177, lr_0 = 1.3389e-04
Loss = 1.0783e-03, PNorm = 172.9403, GNorm = 0.0582, lr_0 = 1.3380e-04
Loss = 8.6516e-04, PNorm = 172.9413, GNorm = 0.2150, lr_0 = 1.3371e-04
Loss = 1.3568e-03, PNorm = 172.9423, GNorm = 0.0971, lr_0 = 1.3362e-04
Loss = 1.4798e-03, PNorm = 172.9445, GNorm = 0.0424, lr_0 = 1.3353e-04
Loss = 9.3296e-04, PNorm = 172.9474, GNorm = 0.0827, lr_0 = 1.3343e-04
Loss = 7.7885e-04, PNorm = 172.9503, GNorm = 0.0903, lr_0 = 1.3334e-04
Loss = 1.0996e-03, PNorm = 172.9525, GNorm = 0.1079, lr_0 = 1.3325e-04
Loss = 7.4422e-04, PNorm = 172.9548, GNorm = 0.0900, lr_0 = 1.3316e-04
Loss = 1.7050e-03, PNorm = 172.9572, GNorm = 0.0975, lr_0 = 1.3307e-04
Loss = 1.6222e-03, PNorm = 172.9591, GNorm = 0.1058, lr_0 = 1.3298e-04
Loss = 2.5772e-03, PNorm = 172.9614, GNorm = 0.0790, lr_0 = 1.3289e-04
Loss = 1.1067e-03, PNorm = 172.9640, GNorm = 0.0936, lr_0 = 1.3280e-04
Loss = 2.2946e-03, PNorm = 172.9651, GNorm = 0.2321, lr_0 = 1.3270e-04
Loss = 9.8421e-04, PNorm = 172.9676, GNorm = 0.1070, lr_0 = 1.3261e-04
Loss = 1.3532e-03, PNorm = 172.9695, GNorm = 0.0481, lr_0 = 1.3252e-04
Loss = 7.1803e-04, PNorm = 172.9705, GNorm = 0.0512, lr_0 = 1.3243e-04
Loss = 9.2228e-04, PNorm = 172.9721, GNorm = 0.0216, lr_0 = 1.3234e-04
Loss = 1.2918e-03, PNorm = 172.9744, GNorm = 0.2024, lr_0 = 1.3225e-04
Loss = 1.5411e-03, PNorm = 172.9764, GNorm = 0.1031, lr_0 = 1.3216e-04
Loss = 4.2013e-03, PNorm = 172.9782, GNorm = 0.4083, lr_0 = 1.3207e-04
Loss = 8.6040e-04, PNorm = 172.9780, GNorm = 0.0730, lr_0 = 1.3198e-04
Loss = 2.0414e-03, PNorm = 172.9798, GNorm = 0.1229, lr_0 = 1.3189e-04
Loss = 1.3915e-03, PNorm = 172.9809, GNorm = 0.1383, lr_0 = 1.3180e-04
Loss = 1.6539e-03, PNorm = 172.9836, GNorm = 0.1296, lr_0 = 1.3171e-04
Loss = 9.4084e-04, PNorm = 172.9848, GNorm = 0.1334, lr_0 = 1.3162e-04
Loss = 3.5523e-03, PNorm = 172.9876, GNorm = 0.1017, lr_0 = 1.3153e-04
Loss = 1.4161e-03, PNorm = 172.9887, GNorm = 0.0477, lr_0 = 1.3144e-04
Loss = 1.1876e-03, PNorm = 172.9921, GNorm = 0.0672, lr_0 = 1.3135e-04
Loss = 9.7642e-04, PNorm = 172.9941, GNorm = 0.0595, lr_0 = 1.3126e-04
Loss = 1.4314e-03, PNorm = 172.9953, GNorm = 0.0638, lr_0 = 1.3117e-04
Loss = 2.9479e-03, PNorm = 172.9979, GNorm = 0.1627, lr_0 = 1.3108e-04
Loss = 8.8900e-04, PNorm = 173.0000, GNorm = 0.1141, lr_0 = 1.3099e-04
Loss = 7.6994e-04, PNorm = 173.0036, GNorm = 0.1103, lr_0 = 1.3090e-04
Loss = 1.6280e-03, PNorm = 173.0082, GNorm = 0.0585, lr_0 = 1.3081e-04
Loss = 3.1133e-03, PNorm = 173.0111, GNorm = 0.1697, lr_0 = 1.3072e-04
Loss = 2.4534e-03, PNorm = 173.0136, GNorm = 0.0520, lr_0 = 1.3063e-04
Loss = 1.7320e-03, PNorm = 173.0145, GNorm = 0.0874, lr_0 = 1.3054e-04
Loss = 1.8042e-03, PNorm = 173.0160, GNorm = 0.1176, lr_0 = 1.3045e-04
Loss = 1.1766e-03, PNorm = 173.0186, GNorm = 0.0951, lr_0 = 1.3036e-04
Loss = 1.7705e-03, PNorm = 173.0202, GNorm = 0.0719, lr_0 = 1.3027e-04
Loss = 1.1163e-03, PNorm = 173.0223, GNorm = 0.0395, lr_0 = 1.3018e-04
Loss = 1.9428e-03, PNorm = 173.0252, GNorm = 0.2471, lr_0 = 1.3009e-04
Loss = 2.6445e-03, PNorm = 173.0264, GNorm = 0.0595, lr_0 = 1.3000e-04
Loss = 2.2353e-03, PNorm = 173.0284, GNorm = 0.1150, lr_0 = 1.2992e-04
Loss = 8.7201e-04, PNorm = 173.0305, GNorm = 0.0972, lr_0 = 1.2983e-04
Loss = 1.3111e-03, PNorm = 173.0331, GNorm = 0.1710, lr_0 = 1.2974e-04
Loss = 1.0575e-03, PNorm = 173.0346, GNorm = 0.0227, lr_0 = 1.2965e-04
Loss = 1.5735e-03, PNorm = 173.0354, GNorm = 0.4748, lr_0 = 1.2956e-04
Loss = 2.7986e-03, PNorm = 173.0365, GNorm = 0.0749, lr_0 = 1.2947e-04
Loss = 2.2633e-03, PNorm = 173.0376, GNorm = 0.1816, lr_0 = 1.2938e-04
Loss = 1.6105e-03, PNorm = 173.0380, GNorm = 0.0984, lr_0 = 1.2929e-04
Loss = 1.7697e-03, PNorm = 173.0399, GNorm = 0.0288, lr_0 = 1.2921e-04
Loss = 1.9792e-03, PNorm = 173.0413, GNorm = 0.0624, lr_0 = 1.2912e-04
Loss = 1.0648e-03, PNorm = 173.0443, GNorm = 0.1158, lr_0 = 1.2903e-04
Loss = 1.1735e-03, PNorm = 173.0485, GNorm = 0.0395, lr_0 = 1.2894e-04
Loss = 1.6760e-03, PNorm = 173.0498, GNorm = 0.1971, lr_0 = 1.2885e-04
Loss = 1.0577e-03, PNorm = 173.0498, GNorm = 0.1124, lr_0 = 1.2876e-04
Loss = 1.6214e-03, PNorm = 173.0516, GNorm = 0.0700, lr_0 = 1.2867e-04
Loss = 8.2197e-04, PNorm = 173.0535, GNorm = 0.0797, lr_0 = 1.2859e-04
Loss = 1.1815e-03, PNorm = 173.0565, GNorm = 0.1900, lr_0 = 1.2850e-04
Loss = 8.1412e-04, PNorm = 173.0593, GNorm = 0.0579, lr_0 = 1.2841e-04
Loss = 1.9743e-03, PNorm = 173.0614, GNorm = 0.0463, lr_0 = 1.2832e-04
Loss = 2.1648e-03, PNorm = 173.0637, GNorm = 0.0585, lr_0 = 1.2823e-04
Loss = 8.6764e-04, PNorm = 173.0666, GNorm = 0.1107, lr_0 = 1.2815e-04
Loss = 2.3524e-03, PNorm = 173.0686, GNorm = 0.1130, lr_0 = 1.2806e-04
Loss = 9.7224e-04, PNorm = 173.0703, GNorm = 0.0833, lr_0 = 1.2797e-04
Validation mae = 0.278035
Epoch 27
Loss = 7.4864e-04, PNorm = 173.0709, GNorm = 0.0273, lr_0 = 1.2788e-04
Loss = 8.3071e-04, PNorm = 173.0717, GNorm = 0.1257, lr_0 = 1.2780e-04
Loss = 9.5411e-04, PNorm = 173.0725, GNorm = 0.0365, lr_0 = 1.2771e-04
Loss = 1.6923e-03, PNorm = 173.0753, GNorm = 0.1102, lr_0 = 1.2762e-04
Loss = 1.6578e-03, PNorm = 173.0771, GNorm = 0.0604, lr_0 = 1.2753e-04
Loss = 1.2712e-03, PNorm = 173.0779, GNorm = 0.0540, lr_0 = 1.2745e-04
Loss = 1.3497e-03, PNorm = 173.0791, GNorm = 0.0769, lr_0 = 1.2736e-04
Loss = 7.2289e-04, PNorm = 173.0796, GNorm = 0.0479, lr_0 = 1.2727e-04
Loss = 1.3364e-03, PNorm = 173.0812, GNorm = 0.1180, lr_0 = 1.2718e-04
Loss = 3.5920e-03, PNorm = 173.0832, GNorm = 0.0481, lr_0 = 1.2710e-04
Loss = 1.6457e-03, PNorm = 173.0852, GNorm = 0.0947, lr_0 = 1.2701e-04
Loss = 7.3306e-04, PNorm = 173.0868, GNorm = 0.2043, lr_0 = 1.2692e-04
Loss = 1.6620e-03, PNorm = 173.0886, GNorm = 0.0925, lr_0 = 1.2684e-04
Loss = 9.2934e-04, PNorm = 173.0909, GNorm = 0.1249, lr_0 = 1.2675e-04
Loss = 1.9401e-03, PNorm = 173.0912, GNorm = 0.1178, lr_0 = 1.2666e-04
Loss = 3.8819e-03, PNorm = 173.0914, GNorm = 0.3426, lr_0 = 1.2658e-04
Loss = 9.2003e-04, PNorm = 173.0933, GNorm = 0.0837, lr_0 = 1.2649e-04
Loss = 1.3753e-03, PNorm = 173.0950, GNorm = 0.3339, lr_0 = 1.2640e-04
Loss = 1.5406e-03, PNorm = 173.0965, GNorm = 0.1895, lr_0 = 1.2632e-04
Loss = 1.2954e-03, PNorm = 173.0971, GNorm = 0.0480, lr_0 = 1.2623e-04
Loss = 1.7470e-03, PNorm = 173.0985, GNorm = 0.1308, lr_0 = 1.2614e-04
Loss = 1.3881e-03, PNorm = 173.1004, GNorm = 0.0284, lr_0 = 1.2606e-04
Loss = 1.7473e-03, PNorm = 173.1027, GNorm = 0.0740, lr_0 = 1.2597e-04
Loss = 2.8544e-03, PNorm = 173.1051, GNorm = 0.0789, lr_0 = 1.2588e-04
Loss = 1.4371e-03, PNorm = 173.1069, GNorm = 0.1322, lr_0 = 1.2580e-04
Loss = 4.0787e-03, PNorm = 173.1082, GNorm = 0.0889, lr_0 = 1.2571e-04
Loss = 1.7479e-03, PNorm = 173.1092, GNorm = 0.1237, lr_0 = 1.2563e-04
Loss = 1.2665e-03, PNorm = 173.1102, GNorm = 0.1755, lr_0 = 1.2554e-04
Loss = 1.1638e-03, PNorm = 173.1121, GNorm = 0.0778, lr_0 = 1.2545e-04
Loss = 1.3344e-03, PNorm = 173.1139, GNorm = 0.0677, lr_0 = 1.2537e-04
Loss = 1.0229e-03, PNorm = 173.1145, GNorm = 0.0853, lr_0 = 1.2528e-04
Loss = 1.0263e-03, PNorm = 173.1156, GNorm = 0.2007, lr_0 = 1.2520e-04
Loss = 1.0807e-03, PNorm = 173.1154, GNorm = 0.1188, lr_0 = 1.2511e-04
Loss = 6.3128e-04, PNorm = 173.1169, GNorm = 0.1076, lr_0 = 1.2502e-04
Loss = 1.4250e-03, PNorm = 173.1189, GNorm = 0.3536, lr_0 = 1.2494e-04
Loss = 3.0080e-03, PNorm = 173.1210, GNorm = 0.0463, lr_0 = 1.2485e-04
Loss = 3.4984e-03, PNorm = 173.1226, GNorm = 0.1790, lr_0 = 1.2477e-04
Loss = 3.4440e-03, PNorm = 173.1227, GNorm = 0.0656, lr_0 = 1.2468e-04
Loss = 7.0217e-04, PNorm = 173.1240, GNorm = 0.1086, lr_0 = 1.2460e-04
Loss = 1.4169e-03, PNorm = 173.1258, GNorm = 0.0620, lr_0 = 1.2451e-04
Loss = 1.2718e-03, PNorm = 173.1273, GNorm = 0.0777, lr_0 = 1.2443e-04
Loss = 7.2802e-04, PNorm = 173.1285, GNorm = 0.1020, lr_0 = 1.2434e-04
Loss = 2.9389e-03, PNorm = 173.1300, GNorm = 0.0689, lr_0 = 1.2426e-04
Loss = 1.0666e-03, PNorm = 173.1315, GNorm = 0.0660, lr_0 = 1.2417e-04
Loss = 1.6643e-03, PNorm = 173.1330, GNorm = 0.0505, lr_0 = 1.2409e-04
Loss = 1.2537e-03, PNorm = 173.1349, GNorm = 0.0628, lr_0 = 1.2400e-04
Loss = 7.1707e-04, PNorm = 173.1372, GNorm = 0.1036, lr_0 = 1.2392e-04
Loss = 7.2002e-04, PNorm = 173.1398, GNorm = 0.1079, lr_0 = 1.2383e-04
Loss = 7.9562e-04, PNorm = 173.1426, GNorm = 0.1608, lr_0 = 1.2375e-04
Loss = 1.7705e-03, PNorm = 173.1446, GNorm = 0.0818, lr_0 = 1.2366e-04
Loss = 1.4858e-03, PNorm = 173.1464, GNorm = 0.0712, lr_0 = 1.2358e-04
Loss = 9.4071e-04, PNorm = 173.1477, GNorm = 0.0521, lr_0 = 1.2349e-04
Loss = 1.6081e-03, PNorm = 173.1495, GNorm = 0.3630, lr_0 = 1.2341e-04
Loss = 1.0447e-03, PNorm = 173.1507, GNorm = 0.1610, lr_0 = 1.2332e-04
Loss = 9.9549e-04, PNorm = 173.1515, GNorm = 0.2031, lr_0 = 1.2324e-04
Loss = 8.8140e-04, PNorm = 173.1527, GNorm = 0.1022, lr_0 = 1.2315e-04
Loss = 6.4167e-04, PNorm = 173.1542, GNorm = 0.0429, lr_0 = 1.2307e-04
Loss = 8.3784e-04, PNorm = 173.1558, GNorm = 0.1565, lr_0 = 1.2298e-04
Loss = 1.2183e-03, PNorm = 173.1563, GNorm = 0.0844, lr_0 = 1.2290e-04
Loss = 7.3825e-04, PNorm = 173.1583, GNorm = 0.0654, lr_0 = 1.2282e-04
Loss = 1.2320e-03, PNorm = 173.1595, GNorm = 0.0571, lr_0 = 1.2273e-04
Loss = 1.5181e-03, PNorm = 173.1602, GNorm = 0.1387, lr_0 = 1.2265e-04
Loss = 1.7346e-03, PNorm = 173.1608, GNorm = 0.0724, lr_0 = 1.2256e-04
Loss = 2.0108e-03, PNorm = 173.1616, GNorm = 0.1735, lr_0 = 1.2248e-04
Loss = 1.1979e-03, PNorm = 173.1635, GNorm = 0.1593, lr_0 = 1.2240e-04
Loss = 7.8422e-04, PNorm = 173.1646, GNorm = 0.1634, lr_0 = 1.2231e-04
Loss = 1.8090e-03, PNorm = 173.1671, GNorm = 0.0949, lr_0 = 1.2223e-04
Loss = 9.8439e-04, PNorm = 173.1687, GNorm = 0.0991, lr_0 = 1.2214e-04
Loss = 1.5453e-03, PNorm = 173.1700, GNorm = 0.0440, lr_0 = 1.2206e-04
Loss = 7.9648e-04, PNorm = 173.1728, GNorm = 0.0587, lr_0 = 1.2198e-04
Loss = 1.7255e-03, PNorm = 173.1741, GNorm = 0.0885, lr_0 = 1.2189e-04
Loss = 8.7473e-04, PNorm = 173.1754, GNorm = 0.0668, lr_0 = 1.2181e-04
Loss = 7.7139e-04, PNorm = 173.1766, GNorm = 0.1626, lr_0 = 1.2173e-04
Loss = 7.1261e-04, PNorm = 173.1778, GNorm = 0.1115, lr_0 = 1.2164e-04
Loss = 6.8629e-04, PNorm = 173.1792, GNorm = 0.0486, lr_0 = 1.2156e-04
Loss = 1.0719e-03, PNorm = 173.1811, GNorm = 0.0913, lr_0 = 1.2148e-04
Loss = 8.8298e-04, PNorm = 173.1819, GNorm = 0.0769, lr_0 = 1.2139e-04
Loss = 1.1326e-03, PNorm = 173.1823, GNorm = 0.1127, lr_0 = 1.2131e-04
Loss = 1.5549e-03, PNorm = 173.1839, GNorm = 0.1325, lr_0 = 1.2123e-04
Loss = 7.1568e-04, PNorm = 173.1864, GNorm = 0.1198, lr_0 = 1.2114e-04
Loss = 1.6643e-03, PNorm = 173.1904, GNorm = 0.0805, lr_0 = 1.2106e-04
Loss = 8.0124e-04, PNorm = 173.1915, GNorm = 0.1406, lr_0 = 1.2098e-04
Loss = 7.3927e-04, PNorm = 173.1929, GNorm = 0.1139, lr_0 = 1.2090e-04
Loss = 8.6933e-04, PNorm = 173.1947, GNorm = 0.0577, lr_0 = 1.2081e-04
Loss = 1.8225e-03, PNorm = 173.1960, GNorm = 0.1068, lr_0 = 1.2073e-04
Loss = 1.2094e-03, PNorm = 173.1971, GNorm = 0.0350, lr_0 = 1.2065e-04
Loss = 5.8481e-04, PNorm = 173.1988, GNorm = 0.0257, lr_0 = 1.2056e-04
Loss = 1.9193e-03, PNorm = 173.2014, GNorm = 0.1441, lr_0 = 1.2048e-04
Loss = 9.3367e-04, PNorm = 173.2037, GNorm = 0.0446, lr_0 = 1.2040e-04
Loss = 9.4455e-04, PNorm = 173.2058, GNorm = 0.0999, lr_0 = 1.2032e-04
Loss = 1.6951e-03, PNorm = 173.2062, GNorm = 0.0826, lr_0 = 1.2023e-04
Loss = 7.4738e-04, PNorm = 173.2080, GNorm = 0.0626, lr_0 = 1.2015e-04
Loss = 6.9941e-04, PNorm = 173.2096, GNorm = 0.0669, lr_0 = 1.2007e-04
Loss = 1.5410e-03, PNorm = 173.2127, GNorm = 0.1655, lr_0 = 1.1999e-04
Loss = 1.0331e-03, PNorm = 173.2152, GNorm = 0.0830, lr_0 = 1.1991e-04
Loss = 1.1887e-03, PNorm = 173.2159, GNorm = 0.0786, lr_0 = 1.1982e-04
Loss = 1.2831e-03, PNorm = 173.2161, GNorm = 0.0636, lr_0 = 1.1974e-04
Loss = 6.7516e-04, PNorm = 173.2172, GNorm = 0.0563, lr_0 = 1.1966e-04
Loss = 1.9378e-03, PNorm = 173.2185, GNorm = 0.0497, lr_0 = 1.1958e-04
Loss = 1.2754e-03, PNorm = 173.2200, GNorm = 0.0523, lr_0 = 1.1950e-04
Loss = 1.1419e-03, PNorm = 173.2230, GNorm = 0.2513, lr_0 = 1.1941e-04
Loss = 2.4155e-03, PNorm = 173.2255, GNorm = 0.1286, lr_0 = 1.1933e-04
Loss = 1.9106e-03, PNorm = 173.2269, GNorm = 0.0720, lr_0 = 1.1925e-04
Loss = 1.2010e-03, PNorm = 173.2277, GNorm = 0.0531, lr_0 = 1.1917e-04
Loss = 1.3159e-03, PNorm = 173.2296, GNorm = 0.0827, lr_0 = 1.1909e-04
Loss = 2.9410e-03, PNorm = 173.2321, GNorm = 0.5223, lr_0 = 1.1901e-04
Loss = 7.4654e-04, PNorm = 173.2343, GNorm = 0.0895, lr_0 = 1.1892e-04
Loss = 9.3070e-04, PNorm = 173.2378, GNorm = 0.0241, lr_0 = 1.1884e-04
Loss = 1.9960e-03, PNorm = 173.2399, GNorm = 0.0870, lr_0 = 1.1876e-04
Loss = 1.1802e-03, PNorm = 173.2400, GNorm = 0.0952, lr_0 = 1.1868e-04
Loss = 1.0008e-03, PNorm = 173.2402, GNorm = 0.1180, lr_0 = 1.1860e-04
Loss = 1.1636e-03, PNorm = 173.2412, GNorm = 0.1066, lr_0 = 1.1852e-04
Loss = 2.7800e-03, PNorm = 173.2430, GNorm = 0.0821, lr_0 = 1.1844e-04
Loss = 2.6404e-03, PNorm = 173.2442, GNorm = 0.0881, lr_0 = 1.1835e-04
Loss = 2.4710e-03, PNorm = 173.2461, GNorm = 0.0918, lr_0 = 1.1827e-04
Loss = 9.3737e-04, PNorm = 173.2478, GNorm = 0.2043, lr_0 = 1.1819e-04
Loss = 1.9475e-03, PNorm = 173.2504, GNorm = 0.0493, lr_0 = 1.1811e-04
Loss = 1.0121e-03, PNorm = 173.2534, GNorm = 0.2540, lr_0 = 1.1803e-04
Loss = 2.6687e-03, PNorm = 173.2552, GNorm = 0.1672, lr_0 = 1.1795e-04
Loss = 8.2407e-04, PNorm = 173.2560, GNorm = 0.0530, lr_0 = 1.1787e-04
Validation mae = 0.277741
Epoch 28
Loss = 9.4049e-04, PNorm = 173.2569, GNorm = 0.3379, lr_0 = 1.1779e-04
Loss = 1.7169e-03, PNorm = 173.2580, GNorm = 0.0716, lr_0 = 1.1771e-04
Loss = 1.9250e-03, PNorm = 173.2591, GNorm = 0.0366, lr_0 = 1.1763e-04
Loss = 1.5873e-03, PNorm = 173.2592, GNorm = 0.1068, lr_0 = 1.1755e-04
Loss = 1.8865e-03, PNorm = 173.2601, GNorm = 0.1293, lr_0 = 1.1747e-04
Loss = 6.1933e-04, PNorm = 173.2616, GNorm = 0.0340, lr_0 = 1.1739e-04
Loss = 1.5199e-03, PNorm = 173.2636, GNorm = 0.1129, lr_0 = 1.1730e-04
Loss = 1.2272e-03, PNorm = 173.2639, GNorm = 0.0605, lr_0 = 1.1722e-04
Loss = 9.5532e-04, PNorm = 173.2652, GNorm = 0.4556, lr_0 = 1.1714e-04
Loss = 1.2164e-03, PNorm = 173.2657, GNorm = 0.1605, lr_0 = 1.1706e-04
Loss = 2.8312e-03, PNorm = 173.2671, GNorm = 0.0421, lr_0 = 1.1698e-04
Loss = 2.0796e-03, PNorm = 173.2696, GNorm = 0.0994, lr_0 = 1.1690e-04
Loss = 6.0717e-04, PNorm = 173.2713, GNorm = 0.0444, lr_0 = 1.1682e-04
Loss = 5.5796e-04, PNorm = 173.2728, GNorm = 0.0929, lr_0 = 1.1674e-04
Loss = 1.1054e-03, PNorm = 173.2746, GNorm = 0.0581, lr_0 = 1.1666e-04
Loss = 6.4930e-04, PNorm = 173.2757, GNorm = 0.0835, lr_0 = 1.1658e-04
Loss = 3.1712e-03, PNorm = 173.2771, GNorm = 0.0729, lr_0 = 1.1650e-04
Loss = 7.7230e-04, PNorm = 173.2794, GNorm = 0.0476, lr_0 = 1.1642e-04
Loss = 9.6644e-04, PNorm = 173.2811, GNorm = 0.1185, lr_0 = 1.1634e-04
Loss = 1.1081e-03, PNorm = 173.2818, GNorm = 0.1391, lr_0 = 1.1626e-04
Loss = 1.1970e-03, PNorm = 173.2846, GNorm = 0.1324, lr_0 = 1.1618e-04
Loss = 6.1017e-04, PNorm = 173.2857, GNorm = 0.0425, lr_0 = 1.1611e-04
Loss = 2.6989e-03, PNorm = 173.2862, GNorm = 0.0740, lr_0 = 1.1603e-04
Loss = 7.8511e-04, PNorm = 173.2865, GNorm = 0.1503, lr_0 = 1.1595e-04
Loss = 8.0125e-04, PNorm = 173.2861, GNorm = 0.0446, lr_0 = 1.1587e-04
Loss = 1.1272e-03, PNorm = 173.2860, GNorm = 0.0410, lr_0 = 1.1579e-04
Loss = 7.7907e-04, PNorm = 173.2873, GNorm = 0.0575, lr_0 = 1.1571e-04
Loss = 9.0881e-04, PNorm = 173.2896, GNorm = 0.0678, lr_0 = 1.1563e-04
Loss = 1.4634e-03, PNorm = 173.2923, GNorm = 0.1069, lr_0 = 1.1555e-04
Loss = 9.8113e-04, PNorm = 173.2944, GNorm = 0.1113, lr_0 = 1.1547e-04
Loss = 6.8476e-04, PNorm = 173.2955, GNorm = 0.0431, lr_0 = 1.1539e-04
Loss = 6.5586e-04, PNorm = 173.2969, GNorm = 0.0487, lr_0 = 1.1531e-04
Loss = 5.4210e-04, PNorm = 173.2975, GNorm = 0.0511, lr_0 = 1.1523e-04
Loss = 8.8554e-04, PNorm = 173.2984, GNorm = 0.0574, lr_0 = 1.1515e-04
Loss = 8.1008e-04, PNorm = 173.2993, GNorm = 0.0259, lr_0 = 1.1508e-04
Loss = 7.8019e-04, PNorm = 173.3010, GNorm = 0.1122, lr_0 = 1.1500e-04
Loss = 7.4000e-04, PNorm = 173.3033, GNorm = 0.0538, lr_0 = 1.1492e-04
Loss = 8.6937e-04, PNorm = 173.3049, GNorm = 0.0508, lr_0 = 1.1484e-04
Loss = 6.1171e-04, PNorm = 173.3064, GNorm = 0.0353, lr_0 = 1.1476e-04
Loss = 1.6310e-03, PNorm = 173.3066, GNorm = 0.0650, lr_0 = 1.1468e-04
Loss = 5.6908e-04, PNorm = 173.3071, GNorm = 0.0689, lr_0 = 1.1460e-04
Loss = 1.0045e-03, PNorm = 173.3072, GNorm = 0.1204, lr_0 = 1.1452e-04
Loss = 7.7154e-04, PNorm = 173.3077, GNorm = 0.0788, lr_0 = 1.1445e-04
Loss = 6.1338e-04, PNorm = 173.3092, GNorm = 0.0460, lr_0 = 1.1437e-04
Loss = 5.1202e-04, PNorm = 173.3109, GNorm = 0.0886, lr_0 = 1.1429e-04
Loss = 7.6004e-04, PNorm = 173.3115, GNorm = 0.0747, lr_0 = 1.1421e-04
Loss = 1.4697e-03, PNorm = 173.3122, GNorm = 0.0421, lr_0 = 1.1413e-04
Loss = 2.5291e-03, PNorm = 173.3136, GNorm = 0.0343, lr_0 = 1.1405e-04
Loss = 7.9925e-04, PNorm = 173.3136, GNorm = 0.1305, lr_0 = 1.1398e-04
Loss = 1.2627e-03, PNorm = 173.3138, GNorm = 0.0872, lr_0 = 1.1390e-04
Loss = 9.0499e-04, PNorm = 173.3152, GNorm = 0.1938, lr_0 = 1.1382e-04
Loss = 5.0445e-03, PNorm = 173.3172, GNorm = 0.0634, lr_0 = 1.1374e-04
Loss = 1.4534e-03, PNorm = 173.3188, GNorm = 0.0338, lr_0 = 1.1366e-04
Loss = 9.2761e-04, PNorm = 173.3200, GNorm = 0.1095, lr_0 = 1.1359e-04
Loss = 6.6918e-04, PNorm = 173.3221, GNorm = 0.0470, lr_0 = 1.1351e-04
Loss = 1.5909e-03, PNorm = 173.3240, GNorm = 0.1122, lr_0 = 1.1343e-04
Loss = 1.8721e-03, PNorm = 173.3260, GNorm = 0.0990, lr_0 = 1.1335e-04
Loss = 5.6621e-04, PNorm = 173.3265, GNorm = 0.0662, lr_0 = 1.1328e-04
Loss = 1.6956e-03, PNorm = 173.3276, GNorm = 0.0962, lr_0 = 1.1320e-04
Loss = 2.4350e-03, PNorm = 173.3297, GNorm = 0.0450, lr_0 = 1.1312e-04
Loss = 5.2222e-04, PNorm = 173.3314, GNorm = 0.1047, lr_0 = 1.1304e-04
Loss = 2.6995e-03, PNorm = 173.3326, GNorm = 0.0875, lr_0 = 1.1297e-04
Loss = 5.7778e-04, PNorm = 173.3321, GNorm = 0.0769, lr_0 = 1.1289e-04
Loss = 1.8269e-03, PNorm = 173.3342, GNorm = 0.0394, lr_0 = 1.1281e-04
Loss = 6.4619e-04, PNorm = 173.3356, GNorm = 0.0372, lr_0 = 1.1273e-04
Loss = 8.0341e-04, PNorm = 173.3369, GNorm = 0.0598, lr_0 = 1.1266e-04
Loss = 1.9599e-03, PNorm = 173.3377, GNorm = 0.0309, lr_0 = 1.1258e-04
Loss = 1.3916e-03, PNorm = 173.3385, GNorm = 0.0338, lr_0 = 1.1250e-04
Loss = 8.8105e-04, PNorm = 173.3399, GNorm = 0.0405, lr_0 = 1.1243e-04
Loss = 7.1419e-04, PNorm = 173.3414, GNorm = 0.0478, lr_0 = 1.1235e-04
Loss = 9.0955e-04, PNorm = 173.3426, GNorm = 0.0554, lr_0 = 1.1227e-04
Loss = 2.0174e-03, PNorm = 173.3448, GNorm = 0.1190, lr_0 = 1.1219e-04
Loss = 1.6891e-03, PNorm = 173.3460, GNorm = 0.0319, lr_0 = 1.1212e-04
Loss = 1.3912e-03, PNorm = 173.3480, GNorm = 0.1904, lr_0 = 1.1204e-04
Loss = 6.0160e-04, PNorm = 173.3498, GNorm = 0.0808, lr_0 = 1.1196e-04
Loss = 6.6705e-04, PNorm = 173.3497, GNorm = 0.0642, lr_0 = 1.1189e-04
Loss = 1.3871e-03, PNorm = 173.3510, GNorm = 0.1058, lr_0 = 1.1181e-04
Loss = 1.8346e-03, PNorm = 173.3536, GNorm = 0.0322, lr_0 = 1.1173e-04
Loss = 8.5224e-04, PNorm = 173.3552, GNorm = 0.1654, lr_0 = 1.1166e-04
Loss = 1.0711e-03, PNorm = 173.3553, GNorm = 0.1179, lr_0 = 1.1158e-04
Loss = 7.7992e-04, PNorm = 173.3558, GNorm = 0.0934, lr_0 = 1.1150e-04
Loss = 1.8789e-03, PNorm = 173.3567, GNorm = 0.0393, lr_0 = 1.1143e-04
Loss = 1.1527e-03, PNorm = 173.3585, GNorm = 0.0442, lr_0 = 1.1135e-04
Loss = 1.3070e-03, PNorm = 173.3602, GNorm = 0.0287, lr_0 = 1.1128e-04
Loss = 1.4583e-03, PNorm = 173.3613, GNorm = 0.0491, lr_0 = 1.1120e-04
Loss = 7.7020e-04, PNorm = 173.3625, GNorm = 0.0850, lr_0 = 1.1112e-04
Loss = 1.5697e-03, PNorm = 173.3631, GNorm = 0.0925, lr_0 = 1.1105e-04
Loss = 1.7367e-03, PNorm = 173.3651, GNorm = 0.1764, lr_0 = 1.1097e-04
Loss = 1.5322e-03, PNorm = 173.3668, GNorm = 0.0672, lr_0 = 1.1089e-04
Loss = 1.2852e-03, PNorm = 173.3688, GNorm = 0.0276, lr_0 = 1.1082e-04
Loss = 1.0779e-03, PNorm = 173.3704, GNorm = 0.0924, lr_0 = 1.1074e-04
Loss = 1.2606e-03, PNorm = 173.3732, GNorm = 0.0490, lr_0 = 1.1067e-04
Loss = 8.1846e-04, PNorm = 173.3757, GNorm = 0.1006, lr_0 = 1.1059e-04
Loss = 1.1765e-03, PNorm = 173.3770, GNorm = 0.1242, lr_0 = 1.1052e-04
Loss = 2.1856e-03, PNorm = 173.3789, GNorm = 0.0562, lr_0 = 1.1044e-04
Loss = 1.4324e-03, PNorm = 173.3802, GNorm = 0.1950, lr_0 = 1.1036e-04
Loss = 1.2751e-03, PNorm = 173.3815, GNorm = 0.0344, lr_0 = 1.1029e-04
Loss = 1.6725e-03, PNorm = 173.3826, GNorm = 0.1533, lr_0 = 1.1021e-04
Loss = 9.8522e-04, PNorm = 173.3838, GNorm = 0.1083, lr_0 = 1.1014e-04
Loss = 1.9510e-03, PNorm = 173.3855, GNorm = 0.0980, lr_0 = 1.1006e-04
Loss = 6.7262e-04, PNorm = 173.3870, GNorm = 0.0789, lr_0 = 1.0999e-04
Loss = 1.8236e-03, PNorm = 173.3877, GNorm = 0.0530, lr_0 = 1.0991e-04
Loss = 6.2339e-04, PNorm = 173.3886, GNorm = 0.0597, lr_0 = 1.0984e-04
Loss = 1.0935e-03, PNorm = 173.3907, GNorm = 0.0732, lr_0 = 1.0976e-04
Loss = 6.8366e-04, PNorm = 173.3926, GNorm = 0.0815, lr_0 = 1.0969e-04
Loss = 7.1698e-04, PNorm = 173.3948, GNorm = 0.0815, lr_0 = 1.0961e-04
Loss = 2.1307e-03, PNorm = 173.3963, GNorm = 0.0651, lr_0 = 1.0954e-04
Loss = 1.7703e-03, PNorm = 173.3971, GNorm = 0.0385, lr_0 = 1.0946e-04
Loss = 2.7532e-03, PNorm = 173.3976, GNorm = 0.0266, lr_0 = 1.0939e-04
Loss = 6.8040e-04, PNorm = 173.3988, GNorm = 0.0747, lr_0 = 1.0931e-04
Loss = 2.7024e-03, PNorm = 173.4016, GNorm = 0.0555, lr_0 = 1.0924e-04
Loss = 1.1888e-03, PNorm = 173.4036, GNorm = 0.1470, lr_0 = 1.0916e-04
Loss = 7.3389e-04, PNorm = 173.4058, GNorm = 0.0488, lr_0 = 1.0909e-04
Loss = 2.2367e-03, PNorm = 173.4071, GNorm = 0.1182, lr_0 = 1.0901e-04
Loss = 2.5456e-03, PNorm = 173.4082, GNorm = 0.4306, lr_0 = 1.0894e-04
Loss = 1.0629e-03, PNorm = 173.4102, GNorm = 0.0650, lr_0 = 1.0886e-04
Loss = 8.9611e-04, PNorm = 173.4115, GNorm = 0.2370, lr_0 = 1.0879e-04
Loss = 1.5274e-03, PNorm = 173.4122, GNorm = 0.0937, lr_0 = 1.0871e-04
Loss = 1.6628e-03, PNorm = 173.4126, GNorm = 0.2365, lr_0 = 1.0864e-04
Loss = 1.3320e-03, PNorm = 173.4138, GNorm = 0.0902, lr_0 = 1.0856e-04
Validation mae = 0.277622
Epoch 29
Loss = 6.6183e-04, PNorm = 173.4143, GNorm = 0.0445, lr_0 = 1.0849e-04
Loss = 1.9648e-03, PNorm = 173.4147, GNorm = 0.0487, lr_0 = 1.0841e-04
Loss = 1.5664e-03, PNorm = 173.4154, GNorm = 0.0620, lr_0 = 1.0834e-04
Loss = 1.0257e-03, PNorm = 173.4174, GNorm = 0.0319, lr_0 = 1.0827e-04
Loss = 1.4205e-03, PNorm = 173.4186, GNorm = 0.1132, lr_0 = 1.0819e-04
Loss = 1.4555e-03, PNorm = 173.4195, GNorm = 0.0578, lr_0 = 1.0812e-04
Loss = 4.8651e-04, PNorm = 173.4200, GNorm = 0.0427, lr_0 = 1.0804e-04
Loss = 6.0891e-04, PNorm = 173.4206, GNorm = 0.0387, lr_0 = 1.0797e-04
Loss = 7.4929e-04, PNorm = 173.4213, GNorm = 0.1397, lr_0 = 1.0790e-04
Loss = 7.7977e-04, PNorm = 173.4225, GNorm = 0.0370, lr_0 = 1.0782e-04
Loss = 7.9165e-04, PNorm = 173.4238, GNorm = 0.0339, lr_0 = 1.0775e-04
Loss = 8.0581e-04, PNorm = 173.4247, GNorm = 0.3123, lr_0 = 1.0767e-04
Loss = 1.0453e-03, PNorm = 173.4268, GNorm = 0.0935, lr_0 = 1.0760e-04
Loss = 9.7282e-04, PNorm = 173.4276, GNorm = 0.2418, lr_0 = 1.0753e-04
Loss = 7.2416e-04, PNorm = 173.4287, GNorm = 0.1012, lr_0 = 1.0745e-04
Loss = 5.6478e-04, PNorm = 173.4288, GNorm = 0.0984, lr_0 = 1.0738e-04
Loss = 1.1953e-03, PNorm = 173.4294, GNorm = 0.1480, lr_0 = 1.0731e-04
Loss = 4.3464e-04, PNorm = 173.4306, GNorm = 0.0410, lr_0 = 1.0723e-04
Loss = 7.0243e-04, PNorm = 173.4317, GNorm = 0.0425, lr_0 = 1.0716e-04
Loss = 4.3904e-04, PNorm = 173.4323, GNorm = 0.0329, lr_0 = 1.0709e-04
Loss = 1.3902e-03, PNorm = 173.4327, GNorm = 0.0465, lr_0 = 1.0701e-04
Loss = 1.0038e-03, PNorm = 173.4331, GNorm = 0.1061, lr_0 = 1.0694e-04
Loss = 1.1191e-03, PNorm = 173.4349, GNorm = 0.0617, lr_0 = 1.0687e-04
Loss = 8.2022e-04, PNorm = 173.4357, GNorm = 0.0275, lr_0 = 1.0679e-04
Loss = 1.9950e-03, PNorm = 173.4369, GNorm = 0.1173, lr_0 = 1.0672e-04
Loss = 6.4978e-04, PNorm = 173.4387, GNorm = 0.0971, lr_0 = 1.0665e-04
Loss = 5.4056e-04, PNorm = 173.4396, GNorm = 0.0355, lr_0 = 1.0657e-04
Loss = 8.3456e-04, PNorm = 173.4401, GNorm = 0.2236, lr_0 = 1.0650e-04
Loss = 1.1573e-03, PNorm = 173.4419, GNorm = 0.1223, lr_0 = 1.0643e-04
Loss = 1.0730e-03, PNorm = 173.4420, GNorm = 0.0500, lr_0 = 1.0635e-04
Loss = 8.7689e-04, PNorm = 173.4423, GNorm = 0.0689, lr_0 = 1.0628e-04
Loss = 5.5928e-04, PNorm = 173.4422, GNorm = 0.1066, lr_0 = 1.0621e-04
Loss = 5.6045e-04, PNorm = 173.4443, GNorm = 0.0508, lr_0 = 1.0614e-04
Loss = 5.5740e-04, PNorm = 173.4460, GNorm = 0.1536, lr_0 = 1.0606e-04
Loss = 9.4720e-04, PNorm = 173.4471, GNorm = 0.0626, lr_0 = 1.0599e-04
Loss = 5.9022e-04, PNorm = 173.4486, GNorm = 0.0415, lr_0 = 1.0592e-04
Loss = 1.2183e-03, PNorm = 173.4495, GNorm = 0.1596, lr_0 = 1.0585e-04
Loss = 1.8604e-03, PNorm = 173.4505, GNorm = 0.0342, lr_0 = 1.0577e-04
Loss = 2.1480e-03, PNorm = 173.4523, GNorm = 0.1091, lr_0 = 1.0570e-04
Loss = 1.7874e-03, PNorm = 173.4532, GNorm = 0.0515, lr_0 = 1.0563e-04
Loss = 8.1248e-04, PNorm = 173.4544, GNorm = 0.0375, lr_0 = 1.0556e-04
Loss = 5.5283e-04, PNorm = 173.4557, GNorm = 0.0544, lr_0 = 1.0548e-04
Loss = 1.0227e-03, PNorm = 173.4572, GNorm = 0.0975, lr_0 = 1.0541e-04
Loss = 1.3723e-03, PNorm = 173.4583, GNorm = 0.0447, lr_0 = 1.0534e-04
Loss = 1.6028e-03, PNorm = 173.4603, GNorm = 0.0454, lr_0 = 1.0527e-04
Loss = 6.3757e-04, PNorm = 173.4627, GNorm = 0.0434, lr_0 = 1.0519e-04
Loss = 7.3886e-04, PNorm = 173.4645, GNorm = 0.0875, lr_0 = 1.0512e-04
Loss = 8.9074e-04, PNorm = 173.4647, GNorm = 0.0426, lr_0 = 1.0505e-04
Loss = 5.6247e-04, PNorm = 173.4651, GNorm = 0.1189, lr_0 = 1.0498e-04
Loss = 1.1968e-03, PNorm = 173.4658, GNorm = 0.1080, lr_0 = 1.0491e-04
Loss = 8.0953e-04, PNorm = 173.4668, GNorm = 0.0566, lr_0 = 1.0483e-04
Loss = 4.7785e-04, PNorm = 173.4688, GNorm = 0.0643, lr_0 = 1.0476e-04
Loss = 7.2766e-04, PNorm = 173.4704, GNorm = 0.1100, lr_0 = 1.0469e-04
Loss = 7.9311e-04, PNorm = 173.4724, GNorm = 0.0735, lr_0 = 1.0462e-04
Loss = 9.1032e-04, PNorm = 173.4744, GNorm = 0.0891, lr_0 = 1.0455e-04
Loss = 1.1383e-03, PNorm = 173.4749, GNorm = 0.0709, lr_0 = 1.0448e-04
Loss = 1.0458e-03, PNorm = 173.4759, GNorm = 0.1221, lr_0 = 1.0440e-04
Loss = 9.8128e-04, PNorm = 173.4778, GNorm = 0.0638, lr_0 = 1.0433e-04
Loss = 6.1311e-04, PNorm = 173.4786, GNorm = 0.0753, lr_0 = 1.0426e-04
Loss = 9.6144e-04, PNorm = 173.4790, GNorm = 0.0641, lr_0 = 1.0419e-04
Loss = 6.8446e-04, PNorm = 173.4801, GNorm = 0.0225, lr_0 = 1.0412e-04
Loss = 1.2946e-03, PNorm = 173.4812, GNorm = 0.0797, lr_0 = 1.0405e-04
Loss = 1.1522e-03, PNorm = 173.4827, GNorm = 0.2369, lr_0 = 1.0398e-04
Loss = 3.4045e-03, PNorm = 173.4835, GNorm = 0.0244, lr_0 = 1.0391e-04
Loss = 2.9558e-03, PNorm = 173.4844, GNorm = 0.0636, lr_0 = 1.0383e-04
Loss = 1.2673e-03, PNorm = 173.4856, GNorm = 0.0299, lr_0 = 1.0376e-04
Loss = 1.2725e-03, PNorm = 173.4863, GNorm = 0.0359, lr_0 = 1.0369e-04
Loss = 1.4506e-03, PNorm = 173.4867, GNorm = 0.1839, lr_0 = 1.0362e-04
Loss = 4.8493e-04, PNorm = 173.4874, GNorm = 0.0985, lr_0 = 1.0355e-04
Loss = 2.6062e-03, PNorm = 173.4886, GNorm = 0.2331, lr_0 = 1.0348e-04
Loss = 6.4616e-04, PNorm = 173.4896, GNorm = 0.0677, lr_0 = 1.0341e-04
Loss = 7.2381e-04, PNorm = 173.4908, GNorm = 0.0831, lr_0 = 1.0334e-04
Loss = 1.5070e-03, PNorm = 173.4916, GNorm = 0.1108, lr_0 = 1.0327e-04
Loss = 1.0453e-03, PNorm = 173.4938, GNorm = 0.0837, lr_0 = 1.0320e-04
Loss = 2.3768e-03, PNorm = 173.4961, GNorm = 0.0379, lr_0 = 1.0312e-04
Loss = 1.9339e-03, PNorm = 173.4963, GNorm = 0.1176, lr_0 = 1.0305e-04
Loss = 5.5434e-04, PNorm = 173.4976, GNorm = 0.0916, lr_0 = 1.0298e-04
Loss = 8.3140e-04, PNorm = 173.4987, GNorm = 0.0486, lr_0 = 1.0291e-04
Loss = 6.7379e-04, PNorm = 173.5000, GNorm = 0.1021, lr_0 = 1.0284e-04
Loss = 8.8779e-04, PNorm = 173.5007, GNorm = 0.1213, lr_0 = 1.0277e-04
Loss = 2.3169e-03, PNorm = 173.5029, GNorm = 0.0847, lr_0 = 1.0270e-04
Loss = 4.4022e-04, PNorm = 173.5048, GNorm = 0.0358, lr_0 = 1.0263e-04
Loss = 7.9293e-04, PNorm = 173.5061, GNorm = 0.1200, lr_0 = 1.0256e-04
Loss = 1.5385e-03, PNorm = 173.5058, GNorm = 0.0687, lr_0 = 1.0249e-04
Loss = 2.2450e-03, PNorm = 173.5073, GNorm = 0.5449, lr_0 = 1.0242e-04
Loss = 1.4078e-03, PNorm = 173.5085, GNorm = 0.1788, lr_0 = 1.0235e-04
Loss = 2.5156e-03, PNorm = 173.5093, GNorm = 0.0894, lr_0 = 1.0228e-04
Loss = 5.6323e-04, PNorm = 173.5098, GNorm = 0.1178, lr_0 = 1.0221e-04
Loss = 6.0241e-04, PNorm = 173.5116, GNorm = 0.0804, lr_0 = 1.0214e-04
Loss = 5.9073e-04, PNorm = 173.5127, GNorm = 0.0237, lr_0 = 1.0207e-04
Loss = 1.1913e-03, PNorm = 173.5136, GNorm = 0.0320, lr_0 = 1.0200e-04
Loss = 6.5784e-04, PNorm = 173.5155, GNorm = 0.0763, lr_0 = 1.0193e-04
Loss = 2.3694e-03, PNorm = 173.5164, GNorm = 0.2055, lr_0 = 1.0186e-04
Loss = 1.4088e-03, PNorm = 173.5169, GNorm = 0.1333, lr_0 = 1.0179e-04
Loss = 1.0961e-03, PNorm = 173.5181, GNorm = 0.0405, lr_0 = 1.0172e-04
Loss = 1.0883e-03, PNorm = 173.5178, GNorm = 0.1553, lr_0 = 1.0165e-04
Loss = 3.7405e-03, PNorm = 173.5189, GNorm = 0.0446, lr_0 = 1.0158e-04
Loss = 6.0031e-04, PNorm = 173.5198, GNorm = 0.0386, lr_0 = 1.0151e-04
Loss = 5.9577e-04, PNorm = 173.5204, GNorm = 0.0597, lr_0 = 1.0144e-04
Loss = 8.7692e-04, PNorm = 173.5207, GNorm = 0.0636, lr_0 = 1.0137e-04
Loss = 1.6421e-03, PNorm = 173.5210, GNorm = 0.0568, lr_0 = 1.0130e-04
Loss = 2.0382e-03, PNorm = 173.5223, GNorm = 0.0382, lr_0 = 1.0123e-04
Loss = 1.6114e-03, PNorm = 173.5241, GNorm = 0.1091, lr_0 = 1.0116e-04
Loss = 1.0745e-03, PNorm = 173.5265, GNorm = 0.0993, lr_0 = 1.0110e-04
Loss = 9.0826e-04, PNorm = 173.5288, GNorm = 0.0628, lr_0 = 1.0103e-04
Loss = 1.6208e-03, PNorm = 173.5302, GNorm = 0.0772, lr_0 = 1.0096e-04
Loss = 1.3933e-03, PNorm = 173.5316, GNorm = 0.0750, lr_0 = 1.0089e-04
Loss = 1.8511e-03, PNorm = 173.5323, GNorm = 0.1025, lr_0 = 1.0082e-04
Loss = 5.6826e-04, PNorm = 173.5343, GNorm = 0.0438, lr_0 = 1.0075e-04
Loss = 8.2954e-04, PNorm = 173.5362, GNorm = 0.0398, lr_0 = 1.0068e-04
Loss = 2.4007e-03, PNorm = 173.5377, GNorm = 0.0994, lr_0 = 1.0061e-04
Loss = 7.6246e-04, PNorm = 173.5387, GNorm = 0.1860, lr_0 = 1.0054e-04
Loss = 1.7325e-03, PNorm = 173.5399, GNorm = 0.0302, lr_0 = 1.0047e-04
Loss = 3.0419e-03, PNorm = 173.5405, GNorm = 0.4220, lr_0 = 1.0041e-04
Loss = 2.8624e-03, PNorm = 173.5409, GNorm = 0.1673, lr_0 = 1.0034e-04
Loss = 1.5735e-03, PNorm = 173.5412, GNorm = 0.0290, lr_0 = 1.0027e-04
Loss = 2.6111e-03, PNorm = 173.5409, GNorm = 0.1964, lr_0 = 1.0020e-04
Loss = 1.3593e-03, PNorm = 173.5402, GNorm = 0.0957, lr_0 = 1.0013e-04
Loss = 1.7289e-03, PNorm = 173.5409, GNorm = 0.0431, lr_0 = 1.0006e-04
Loss = 8.5883e-04, PNorm = 173.5428, GNorm = 0.0331, lr_0 = 1.0000e-04
Validation mae = 0.277794
Model 0 best validation mae = 0.277622 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.273259
Ensemble test mae = 0.273259
Fold 1
Splitting data with seed 1
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 9.9748e-01, PNorm = 65.7655, GNorm = 2.6124, lr_0 = 1.0413e-04
Loss = 7.6385e-01, PNorm = 65.7785, GNorm = 1.7659, lr_0 = 1.0788e-04
Loss = 5.0782e-01, PNorm = 65.7930, GNorm = 2.9441, lr_0 = 1.1163e-04
Loss = 5.1277e-01, PNorm = 65.8052, GNorm = 2.8965, lr_0 = 1.1537e-04
Loss = 5.2401e-01, PNorm = 65.8152, GNorm = 3.4115, lr_0 = 1.1913e-04
Loss = 4.9145e-01, PNorm = 65.8247, GNorm = 3.2538, lr_0 = 1.2287e-04
Loss = 4.4602e-01, PNorm = 65.8345, GNorm = 2.0220, lr_0 = 1.2663e-04
Loss = 4.0233e-01, PNorm = 65.8437, GNorm = 2.8932, lr_0 = 1.3038e-04
Loss = 4.1402e-01, PNorm = 65.8514, GNorm = 3.2181, lr_0 = 1.3413e-04
Loss = 4.0113e-01, PNorm = 65.8599, GNorm = 2.2027, lr_0 = 1.3788e-04
Loss = 3.9218e-01, PNorm = 65.8700, GNorm = 1.9085, lr_0 = 1.4163e-04
Loss = 3.3560e-01, PNorm = 65.8801, GNorm = 1.6762, lr_0 = 1.4537e-04
Loss = 3.6630e-01, PNorm = 65.8906, GNorm = 3.4374, lr_0 = 1.4913e-04
Loss = 3.8413e-01, PNorm = 65.9008, GNorm = 2.3051, lr_0 = 1.5288e-04
Loss = 3.5029e-01, PNorm = 65.9118, GNorm = 1.7894, lr_0 = 1.5662e-04
Loss = 3.5980e-01, PNorm = 65.9250, GNorm = 1.8641, lr_0 = 1.6038e-04
Loss = 3.5269e-01, PNorm = 65.9361, GNorm = 2.4165, lr_0 = 1.6412e-04
Loss = 3.5295e-01, PNorm = 65.9490, GNorm = 2.0472, lr_0 = 1.6788e-04
Loss = 3.8174e-01, PNorm = 65.9617, GNorm = 1.7045, lr_0 = 1.7163e-04
Loss = 3.8444e-01, PNorm = 65.9737, GNorm = 1.8446, lr_0 = 1.7538e-04
Loss = 3.5903e-01, PNorm = 65.9878, GNorm = 1.7734, lr_0 = 1.7913e-04
Loss = 3.3408e-01, PNorm = 66.0024, GNorm = 2.1140, lr_0 = 1.8288e-04
Loss = 3.8299e-01, PNorm = 66.0141, GNorm = 1.9297, lr_0 = 1.8662e-04
Loss = 3.0415e-01, PNorm = 66.0279, GNorm = 2.7012, lr_0 = 1.9038e-04
Loss = 3.8516e-01, PNorm = 66.0444, GNorm = 1.5455, lr_0 = 1.9413e-04
Loss = 3.4361e-01, PNorm = 66.0593, GNorm = 1.8639, lr_0 = 1.9788e-04
Loss = 3.4936e-01, PNorm = 66.0730, GNorm = 1.3633, lr_0 = 2.0163e-04
Loss = 3.3133e-01, PNorm = 66.0871, GNorm = 1.9470, lr_0 = 2.0537e-04
Loss = 3.2830e-01, PNorm = 66.1033, GNorm = 1.2010, lr_0 = 2.0913e-04
Loss = 3.0328e-01, PNorm = 66.1171, GNorm = 2.1740, lr_0 = 2.1288e-04
Loss = 3.0883e-01, PNorm = 66.1305, GNorm = 2.4466, lr_0 = 2.1663e-04
Loss = 3.1687e-01, PNorm = 66.1479, GNorm = 1.5166, lr_0 = 2.2038e-04
Loss = 2.9968e-01, PNorm = 66.1658, GNorm = 1.6277, lr_0 = 2.2412e-04
Loss = 3.0425e-01, PNorm = 66.1823, GNorm = 1.1598, lr_0 = 2.2787e-04
Loss = 3.4402e-01, PNorm = 66.2018, GNorm = 1.6200, lr_0 = 2.3163e-04
Loss = 2.9697e-01, PNorm = 66.2199, GNorm = 1.7139, lr_0 = 2.3538e-04
Loss = 2.8895e-01, PNorm = 66.2382, GNorm = 1.3200, lr_0 = 2.3913e-04
Loss = 3.4336e-01, PNorm = 66.2591, GNorm = 1.6162, lr_0 = 2.4288e-04
Loss = 3.1408e-01, PNorm = 66.2773, GNorm = 1.1817, lr_0 = 2.4662e-04
Loss = 3.1715e-01, PNorm = 66.2968, GNorm = 2.2209, lr_0 = 2.5038e-04
Loss = 2.8201e-01, PNorm = 66.3174, GNorm = 1.8843, lr_0 = 2.5413e-04
Loss = 3.0369e-01, PNorm = 66.3366, GNorm = 1.5855, lr_0 = 2.5788e-04
Loss = 3.0178e-01, PNorm = 66.3572, GNorm = 1.4861, lr_0 = 2.6163e-04
Loss = 2.8777e-01, PNorm = 66.3779, GNorm = 1.1414, lr_0 = 2.6537e-04
Loss = 3.2475e-01, PNorm = 66.4002, GNorm = 1.2994, lr_0 = 2.6912e-04
Loss = 2.8259e-01, PNorm = 66.4263, GNorm = 1.3729, lr_0 = 2.7288e-04
Loss = 2.7919e-01, PNorm = 66.4431, GNorm = 0.9961, lr_0 = 2.7663e-04
Loss = 3.3044e-01, PNorm = 66.4704, GNorm = 2.1684, lr_0 = 2.8038e-04
Loss = 2.5742e-01, PNorm = 66.4941, GNorm = 1.7175, lr_0 = 2.8413e-04
Loss = 3.3596e-01, PNorm = 66.5143, GNorm = 1.3709, lr_0 = 2.8787e-04
Loss = 3.0794e-01, PNorm = 66.5415, GNorm = 1.2649, lr_0 = 2.9163e-04
Loss = 2.7229e-01, PNorm = 66.5684, GNorm = 1.1702, lr_0 = 2.9538e-04
Loss = 2.9048e-01, PNorm = 66.5899, GNorm = 1.0776, lr_0 = 2.9913e-04
Loss = 2.6783e-01, PNorm = 66.6168, GNorm = 1.0331, lr_0 = 3.0288e-04
Loss = 3.0327e-01, PNorm = 66.6411, GNorm = 2.2832, lr_0 = 3.0662e-04
Loss = 2.9140e-01, PNorm = 66.6656, GNorm = 1.2312, lr_0 = 3.1037e-04
Loss = 2.8613e-01, PNorm = 66.6946, GNorm = 1.7148, lr_0 = 3.1413e-04
Loss = 2.9944e-01, PNorm = 66.7218, GNorm = 1.2922, lr_0 = 3.1788e-04
Loss = 2.8211e-01, PNorm = 66.7527, GNorm = 1.4742, lr_0 = 3.2163e-04
Loss = 2.9210e-01, PNorm = 66.7765, GNorm = 1.2100, lr_0 = 3.2538e-04
Loss = 3.0583e-01, PNorm = 66.8119, GNorm = 1.2656, lr_0 = 3.2912e-04
Loss = 2.4448e-01, PNorm = 66.8384, GNorm = 1.1723, lr_0 = 3.3288e-04
Loss = 2.7898e-01, PNorm = 66.8640, GNorm = 1.6087, lr_0 = 3.3663e-04
Loss = 2.5100e-01, PNorm = 66.8971, GNorm = 1.1798, lr_0 = 3.4038e-04
Loss = 3.0159e-01, PNorm = 66.9228, GNorm = 1.3339, lr_0 = 3.4413e-04
Loss = 2.7036e-01, PNorm = 66.9557, GNorm = 1.2721, lr_0 = 3.4787e-04
Loss = 2.7687e-01, PNorm = 66.9935, GNorm = 1.0248, lr_0 = 3.5162e-04
Loss = 2.4880e-01, PNorm = 67.0246, GNorm = 1.2707, lr_0 = 3.5538e-04
Loss = 2.9718e-01, PNorm = 67.0575, GNorm = 1.5401, lr_0 = 3.5913e-04
Loss = 2.8966e-01, PNorm = 67.0945, GNorm = 1.1640, lr_0 = 3.6288e-04
Loss = 2.4978e-01, PNorm = 67.1334, GNorm = 1.1507, lr_0 = 3.6662e-04
Loss = 3.1626e-01, PNorm = 67.1685, GNorm = 1.5759, lr_0 = 3.7037e-04
Loss = 2.2518e-01, PNorm = 67.2072, GNorm = 1.0135, lr_0 = 3.7413e-04
Loss = 2.8109e-01, PNorm = 67.2419, GNorm = 1.3583, lr_0 = 3.7788e-04
Loss = 2.9488e-01, PNorm = 67.2799, GNorm = 1.2300, lr_0 = 3.8163e-04
Loss = 2.9693e-01, PNorm = 67.3175, GNorm = 1.2345, lr_0 = 3.8537e-04
Loss = 2.7156e-01, PNorm = 67.3571, GNorm = 1.2446, lr_0 = 3.8912e-04
Loss = 2.6960e-01, PNorm = 67.3935, GNorm = 1.0960, lr_0 = 3.9287e-04
Loss = 2.8427e-01, PNorm = 67.4297, GNorm = 0.9164, lr_0 = 3.9663e-04
Loss = 2.4782e-01, PNorm = 67.4668, GNorm = 1.1232, lr_0 = 4.0038e-04
Loss = 2.7719e-01, PNorm = 67.5030, GNorm = 1.5079, lr_0 = 4.0413e-04
Loss = 2.4717e-01, PNorm = 67.5489, GNorm = 0.7114, lr_0 = 4.0787e-04
Loss = 2.6226e-01, PNorm = 67.5900, GNorm = 1.2746, lr_0 = 4.1162e-04
Loss = 2.5175e-01, PNorm = 67.6305, GNorm = 1.2415, lr_0 = 4.1537e-04
Loss = 2.5025e-01, PNorm = 67.6717, GNorm = 1.1831, lr_0 = 4.1913e-04
Loss = 2.6655e-01, PNorm = 67.7059, GNorm = 1.1399, lr_0 = 4.2288e-04
Loss = 2.5174e-01, PNorm = 67.7497, GNorm = 1.2844, lr_0 = 4.2662e-04
Loss = 3.3212e-01, PNorm = 67.7931, GNorm = 1.0130, lr_0 = 4.3037e-04
Loss = 3.1808e-01, PNorm = 67.8426, GNorm = 1.1568, lr_0 = 4.3412e-04
Loss = 2.3338e-01, PNorm = 67.8932, GNorm = 1.1004, lr_0 = 4.3788e-04
Loss = 2.7239e-01, PNorm = 67.9398, GNorm = 1.2054, lr_0 = 4.4163e-04
Loss = 2.5559e-01, PNorm = 67.9866, GNorm = 1.1288, lr_0 = 4.4538e-04
Loss = 2.5998e-01, PNorm = 68.0322, GNorm = 0.9514, lr_0 = 4.4912e-04
Loss = 2.5479e-01, PNorm = 68.0773, GNorm = 0.7763, lr_0 = 4.5287e-04
Loss = 2.6230e-01, PNorm = 68.1274, GNorm = 1.4205, lr_0 = 4.5662e-04
Loss = 2.4014e-01, PNorm = 68.1760, GNorm = 0.9700, lr_0 = 4.6038e-04
Loss = 2.6576e-01, PNorm = 68.2183, GNorm = 0.9774, lr_0 = 4.6413e-04
Loss = 2.5622e-01, PNorm = 68.2752, GNorm = 1.0949, lr_0 = 4.6787e-04
Loss = 2.5782e-01, PNorm = 68.3241, GNorm = 0.9485, lr_0 = 4.7162e-04
Loss = 2.4206e-01, PNorm = 68.3711, GNorm = 0.9495, lr_0 = 4.7537e-04
Loss = 2.3698e-01, PNorm = 68.4197, GNorm = 0.9156, lr_0 = 4.7913e-04
Loss = 3.0473e-01, PNorm = 68.4710, GNorm = 1.0042, lr_0 = 4.8288e-04
Loss = 2.6528e-01, PNorm = 68.5266, GNorm = 1.0214, lr_0 = 4.8663e-04
Loss = 2.5448e-01, PNorm = 68.5835, GNorm = 0.8654, lr_0 = 4.9038e-04
Loss = 2.4660e-01, PNorm = 68.6398, GNorm = 1.4450, lr_0 = 4.9412e-04
Loss = 2.9682e-01, PNorm = 68.6907, GNorm = 1.0712, lr_0 = 4.9788e-04
Loss = 2.6176e-01, PNorm = 68.7532, GNorm = 0.9317, lr_0 = 5.0163e-04
Loss = 2.4712e-01, PNorm = 68.8072, GNorm = 1.3128, lr_0 = 5.0538e-04
Loss = 2.5646e-01, PNorm = 68.8637, GNorm = 1.1775, lr_0 = 5.0913e-04
Loss = 2.5580e-01, PNorm = 68.9215, GNorm = 1.2357, lr_0 = 5.1287e-04
Loss = 2.5207e-01, PNorm = 68.9820, GNorm = 1.4809, lr_0 = 5.1663e-04
Loss = 2.2722e-01, PNorm = 69.0382, GNorm = 0.8412, lr_0 = 5.2038e-04
Loss = 2.7560e-01, PNorm = 69.0943, GNorm = 1.2105, lr_0 = 5.2413e-04
Loss = 2.7840e-01, PNorm = 69.1570, GNorm = 1.0282, lr_0 = 5.2788e-04
Loss = 2.6052e-01, PNorm = 69.2201, GNorm = 0.8568, lr_0 = 5.3162e-04
Loss = 2.4585e-01, PNorm = 69.2852, GNorm = 0.8342, lr_0 = 5.3538e-04
Loss = 2.3503e-01, PNorm = 69.3439, GNorm = 0.8288, lr_0 = 5.3912e-04
Loss = 2.5061e-01, PNorm = 69.4015, GNorm = 1.1279, lr_0 = 5.4288e-04
Loss = 2.6968e-01, PNorm = 69.4668, GNorm = 1.6058, lr_0 = 5.4663e-04
Loss = 2.7308e-01, PNorm = 69.5304, GNorm = 0.9763, lr_0 = 5.5038e-04
Validation mae = 0.315607
Epoch 1
Loss = 1.6417e-01, PNorm = 69.5966, GNorm = 0.8970, lr_0 = 5.5413e-04
Loss = 1.8823e-01, PNorm = 69.6613, GNorm = 1.1174, lr_0 = 5.5787e-04
Loss = 1.7698e-01, PNorm = 69.7168, GNorm = 0.7674, lr_0 = 5.6163e-04
Loss = 1.6410e-01, PNorm = 69.7757, GNorm = 0.6893, lr_0 = 5.6538e-04
Loss = 1.7416e-01, PNorm = 69.8290, GNorm = 0.7165, lr_0 = 5.6913e-04
Loss = 1.7030e-01, PNorm = 69.8957, GNorm = 0.7710, lr_0 = 5.7288e-04
Loss = 1.9663e-01, PNorm = 69.9515, GNorm = 0.9629, lr_0 = 5.7662e-04
Loss = 1.9287e-01, PNorm = 70.0191, GNorm = 0.9834, lr_0 = 5.8038e-04
Loss = 1.5958e-01, PNorm = 70.0789, GNorm = 0.6899, lr_0 = 5.8413e-04
Loss = 1.7846e-01, PNorm = 70.1474, GNorm = 0.7524, lr_0 = 5.8788e-04
Loss = 1.3880e-01, PNorm = 70.2100, GNorm = 0.5773, lr_0 = 5.9163e-04
Loss = 1.8348e-01, PNorm = 70.2748, GNorm = 0.9509, lr_0 = 5.9538e-04
Loss = 1.7460e-01, PNorm = 70.3517, GNorm = 0.9677, lr_0 = 5.9913e-04
Loss = 1.7343e-01, PNorm = 70.4245, GNorm = 1.2552, lr_0 = 6.0288e-04
Loss = 1.9153e-01, PNorm = 70.5047, GNorm = 0.9287, lr_0 = 6.0663e-04
Loss = 1.9492e-01, PNorm = 70.5915, GNorm = 1.3194, lr_0 = 6.1038e-04
Loss = 1.6568e-01, PNorm = 70.6746, GNorm = 0.7614, lr_0 = 6.1413e-04
Loss = 1.8660e-01, PNorm = 70.7475, GNorm = 1.0692, lr_0 = 6.1788e-04
Loss = 1.8566e-01, PNorm = 70.8358, GNorm = 1.1041, lr_0 = 6.2163e-04
Loss = 1.8357e-01, PNorm = 70.9096, GNorm = 0.7477, lr_0 = 6.2538e-04
Loss = 1.8036e-01, PNorm = 71.0004, GNorm = 0.7988, lr_0 = 6.2913e-04
Loss = 1.8163e-01, PNorm = 71.0814, GNorm = 0.9436, lr_0 = 6.3288e-04
Loss = 1.5954e-01, PNorm = 71.1584, GNorm = 0.4967, lr_0 = 6.3663e-04
Loss = 1.9878e-01, PNorm = 71.2361, GNorm = 1.2673, lr_0 = 6.4038e-04
Loss = 1.7760e-01, PNorm = 71.3174, GNorm = 1.2194, lr_0 = 6.4413e-04
Loss = 1.7511e-01, PNorm = 71.4034, GNorm = 0.9417, lr_0 = 6.4788e-04
Loss = 1.7509e-01, PNorm = 71.4915, GNorm = 0.9086, lr_0 = 6.5163e-04
Loss = 1.8105e-01, PNorm = 71.5724, GNorm = 0.7899, lr_0 = 6.5538e-04
Loss = 1.7135e-01, PNorm = 71.6622, GNorm = 0.9824, lr_0 = 6.5913e-04
Loss = 1.9643e-01, PNorm = 71.7497, GNorm = 1.0044, lr_0 = 6.6288e-04
Loss = 1.9810e-01, PNorm = 71.8497, GNorm = 0.8681, lr_0 = 6.6663e-04
Loss = 1.9878e-01, PNorm = 71.9447, GNorm = 1.0200, lr_0 = 6.7038e-04
Loss = 1.8759e-01, PNorm = 72.0412, GNorm = 1.1657, lr_0 = 6.7413e-04
Loss = 1.8686e-01, PNorm = 72.1383, GNorm = 1.2138, lr_0 = 6.7788e-04
Loss = 2.0308e-01, PNorm = 72.2354, GNorm = 1.1861, lr_0 = 6.8163e-04
Loss = 1.9953e-01, PNorm = 72.3363, GNorm = 0.8873, lr_0 = 6.8538e-04
Loss = 2.2129e-01, PNorm = 72.4412, GNorm = 0.9373, lr_0 = 6.8913e-04
Loss = 1.9920e-01, PNorm = 72.5424, GNorm = 0.8546, lr_0 = 6.9288e-04
Loss = 1.8513e-01, PNorm = 72.6537, GNorm = 0.7382, lr_0 = 6.9663e-04
Loss = 1.9906e-01, PNorm = 72.7515, GNorm = 0.9768, lr_0 = 7.0038e-04
Loss = 2.0258e-01, PNorm = 72.8623, GNorm = 0.8472, lr_0 = 7.0413e-04
Loss = 1.8503e-01, PNorm = 72.9646, GNorm = 0.9358, lr_0 = 7.0788e-04
Loss = 1.7751e-01, PNorm = 73.0729, GNorm = 0.7584, lr_0 = 7.1163e-04
Loss = 2.0067e-01, PNorm = 73.1809, GNorm = 1.1667, lr_0 = 7.1538e-04
Loss = 1.6240e-01, PNorm = 73.2874, GNorm = 0.6155, lr_0 = 7.1913e-04
Loss = 1.9730e-01, PNorm = 73.3976, GNorm = 1.6729, lr_0 = 7.2288e-04
Loss = 1.9504e-01, PNorm = 73.4990, GNorm = 0.7759, lr_0 = 7.2663e-04
Loss = 1.8435e-01, PNorm = 73.6047, GNorm = 0.9664, lr_0 = 7.3038e-04
Loss = 1.8408e-01, PNorm = 73.7041, GNorm = 1.0522, lr_0 = 7.3413e-04
Loss = 2.0921e-01, PNorm = 73.8159, GNorm = 0.9971, lr_0 = 7.3788e-04
Loss = 1.9604e-01, PNorm = 73.9245, GNorm = 0.8837, lr_0 = 7.4163e-04
Loss = 1.8531e-01, PNorm = 74.0325, GNorm = 0.7134, lr_0 = 7.4538e-04
Loss = 1.9573e-01, PNorm = 74.1366, GNorm = 0.9594, lr_0 = 7.4913e-04
Loss = 2.1425e-01, PNorm = 74.2523, GNorm = 0.6953, lr_0 = 7.5288e-04
Loss = 2.3770e-01, PNorm = 74.3807, GNorm = 0.7494, lr_0 = 7.5663e-04
Loss = 2.0312e-01, PNorm = 74.5055, GNorm = 0.9760, lr_0 = 7.6038e-04
Loss = 2.0662e-01, PNorm = 74.6403, GNorm = 1.1065, lr_0 = 7.6413e-04
Loss = 1.7706e-01, PNorm = 74.7649, GNorm = 0.9769, lr_0 = 7.6788e-04
Loss = 2.0179e-01, PNorm = 74.8880, GNorm = 0.7222, lr_0 = 7.7163e-04
Loss = 1.8659e-01, PNorm = 75.0041, GNorm = 0.6892, lr_0 = 7.7538e-04
Loss = 2.4053e-01, PNorm = 75.1347, GNorm = 0.8578, lr_0 = 7.7913e-04
Loss = 2.0951e-01, PNorm = 75.2631, GNorm = 0.7665, lr_0 = 7.8288e-04
Loss = 2.1236e-01, PNorm = 75.3976, GNorm = 0.8712, lr_0 = 7.8663e-04
Loss = 2.1013e-01, PNorm = 75.5202, GNorm = 1.1697, lr_0 = 7.9038e-04
Loss = 1.8917e-01, PNorm = 75.6428, GNorm = 1.0743, lr_0 = 7.9413e-04
Loss = 2.1935e-01, PNorm = 75.7620, GNorm = 1.0546, lr_0 = 7.9788e-04
Loss = 1.9468e-01, PNorm = 75.8920, GNorm = 0.6774, lr_0 = 8.0163e-04
Loss = 1.9246e-01, PNorm = 76.0102, GNorm = 0.7626, lr_0 = 8.0538e-04
Loss = 1.8435e-01, PNorm = 76.1246, GNorm = 0.6602, lr_0 = 8.0913e-04
Loss = 2.1534e-01, PNorm = 76.2354, GNorm = 0.9918, lr_0 = 8.1288e-04
Loss = 2.0891e-01, PNorm = 76.3610, GNorm = 0.7351, lr_0 = 8.1663e-04
Loss = 2.1538e-01, PNorm = 76.4826, GNorm = 0.7356, lr_0 = 8.2038e-04
Loss = 2.4476e-01, PNorm = 76.6209, GNorm = 1.1269, lr_0 = 8.2413e-04
Loss = 2.0974e-01, PNorm = 76.7565, GNorm = 0.6925, lr_0 = 8.2788e-04
Loss = 1.7849e-01, PNorm = 76.8956, GNorm = 0.9449, lr_0 = 8.3163e-04
Loss = 1.9298e-01, PNorm = 77.0148, GNorm = 0.9488, lr_0 = 8.3538e-04
Loss = 1.8913e-01, PNorm = 77.1378, GNorm = 0.8502, lr_0 = 8.3913e-04
Loss = 2.1552e-01, PNorm = 77.2567, GNorm = 1.1107, lr_0 = 8.4288e-04
Loss = 1.8844e-01, PNorm = 77.3938, GNorm = 1.0406, lr_0 = 8.4663e-04
Loss = 1.8620e-01, PNorm = 77.5188, GNorm = 0.8613, lr_0 = 8.5038e-04
Loss = 1.9475e-01, PNorm = 77.6449, GNorm = 0.7619, lr_0 = 8.5413e-04
Loss = 1.9007e-01, PNorm = 77.7629, GNorm = 0.8912, lr_0 = 8.5788e-04
Loss = 2.0888e-01, PNorm = 77.8930, GNorm = 0.9224, lr_0 = 8.6163e-04
Loss = 1.9988e-01, PNorm = 78.0151, GNorm = 0.8107, lr_0 = 8.6538e-04
Loss = 1.9216e-01, PNorm = 78.1487, GNorm = 0.8673, lr_0 = 8.6913e-04
Loss = 2.1051e-01, PNorm = 78.2850, GNorm = 1.1508, lr_0 = 8.7288e-04
Loss = 2.0206e-01, PNorm = 78.4215, GNorm = 0.7248, lr_0 = 8.7663e-04
Loss = 2.2982e-01, PNorm = 78.5630, GNorm = 0.9116, lr_0 = 8.8038e-04
Loss = 1.8865e-01, PNorm = 78.6937, GNorm = 0.8797, lr_0 = 8.8413e-04
Loss = 2.1644e-01, PNorm = 78.8180, GNorm = 1.1014, lr_0 = 8.8788e-04
Loss = 2.3597e-01, PNorm = 78.9651, GNorm = 1.5810, lr_0 = 8.9163e-04
Loss = 1.9997e-01, PNorm = 79.1017, GNorm = 0.9820, lr_0 = 8.9538e-04
Loss = 2.0605e-01, PNorm = 79.2402, GNorm = 1.2048, lr_0 = 8.9913e-04
Loss = 2.1595e-01, PNorm = 79.3841, GNorm = 0.6833, lr_0 = 9.0288e-04
Loss = 2.2774e-01, PNorm = 79.5179, GNorm = 1.0023, lr_0 = 9.0663e-04
Loss = 1.9462e-01, PNorm = 79.6512, GNorm = 0.8595, lr_0 = 9.1038e-04
Loss = 1.7676e-01, PNorm = 79.7799, GNorm = 0.9125, lr_0 = 9.1413e-04
Loss = 2.0434e-01, PNorm = 79.9056, GNorm = 0.6374, lr_0 = 9.1788e-04
Loss = 2.2155e-01, PNorm = 80.0367, GNorm = 0.7166, lr_0 = 9.2163e-04
Loss = 1.9629e-01, PNorm = 80.1842, GNorm = 1.0705, lr_0 = 9.2538e-04
Loss = 2.3813e-01, PNorm = 80.3236, GNorm = 0.7032, lr_0 = 9.2913e-04
Loss = 2.0137e-01, PNorm = 80.4719, GNorm = 0.7484, lr_0 = 9.3288e-04
Loss = 2.2898e-01, PNorm = 80.6171, GNorm = 1.0787, lr_0 = 9.3663e-04
Loss = 2.0851e-01, PNorm = 80.7689, GNorm = 0.6197, lr_0 = 9.4038e-04
Loss = 1.9597e-01, PNorm = 80.9204, GNorm = 0.6829, lr_0 = 9.4413e-04
Loss = 2.0923e-01, PNorm = 81.0658, GNorm = 0.8270, lr_0 = 9.4788e-04
Loss = 1.8214e-01, PNorm = 81.2137, GNorm = 0.7580, lr_0 = 9.5163e-04
Loss = 2.2876e-01, PNorm = 81.3566, GNorm = 1.3932, lr_0 = 9.5538e-04
Loss = 2.3072e-01, PNorm = 81.5039, GNorm = 0.6970, lr_0 = 9.5913e-04
Loss = 1.8714e-01, PNorm = 81.6660, GNorm = 0.7360, lr_0 = 9.6288e-04
Loss = 1.9762e-01, PNorm = 81.8172, GNorm = 0.7840, lr_0 = 9.6663e-04
Loss = 2.3390e-01, PNorm = 81.9690, GNorm = 0.8484, lr_0 = 9.7038e-04
Loss = 2.0304e-01, PNorm = 82.1332, GNorm = 0.5955, lr_0 = 9.7413e-04
Loss = 2.3418e-01, PNorm = 82.2806, GNorm = 0.8220, lr_0 = 9.7788e-04
Loss = 2.2580e-01, PNorm = 82.4345, GNorm = 1.2370, lr_0 = 9.8163e-04
Loss = 2.0339e-01, PNorm = 82.5976, GNorm = 0.6636, lr_0 = 9.8537e-04
Loss = 2.0879e-01, PNorm = 82.7457, GNorm = 0.7147, lr_0 = 9.8912e-04
Loss = 2.2952e-01, PNorm = 82.8931, GNorm = 0.8126, lr_0 = 9.9288e-04
Loss = 1.9677e-01, PNorm = 83.0551, GNorm = 0.5712, lr_0 = 9.9663e-04
Loss = 2.1258e-01, PNorm = 83.2149, GNorm = 0.6405, lr_0 = 9.9993e-04
Validation mae = 0.315606
Epoch 2
Loss = 1.4850e-01, PNorm = 83.3820, GNorm = 1.0730, lr_0 = 9.9925e-04
Loss = 1.3501e-01, PNorm = 83.5123, GNorm = 0.9175, lr_0 = 9.9856e-04
Loss = 1.4021e-01, PNorm = 83.6372, GNorm = 0.6360, lr_0 = 9.9788e-04
Loss = 1.3047e-01, PNorm = 83.7666, GNorm = 0.5067, lr_0 = 9.9719e-04
Loss = 1.6018e-01, PNorm = 83.8903, GNorm = 0.5002, lr_0 = 9.9651e-04
Loss = 1.3004e-01, PNorm = 84.0259, GNorm = 0.5629, lr_0 = 9.9583e-04
Loss = 1.2708e-01, PNorm = 84.1322, GNorm = 0.6801, lr_0 = 9.9515e-04
Loss = 1.2875e-01, PNorm = 84.2466, GNorm = 1.1131, lr_0 = 9.9446e-04
Loss = 1.2958e-01, PNorm = 84.3571, GNorm = 0.5604, lr_0 = 9.9378e-04
Loss = 1.3597e-01, PNorm = 84.4628, GNorm = 0.5895, lr_0 = 9.9310e-04
Loss = 1.2747e-01, PNorm = 84.5755, GNorm = 0.6366, lr_0 = 9.9242e-04
Loss = 1.2071e-01, PNorm = 84.6942, GNorm = 0.4937, lr_0 = 9.9174e-04
Loss = 1.3174e-01, PNorm = 84.8051, GNorm = 0.4624, lr_0 = 9.9106e-04
Loss = 1.1813e-01, PNorm = 84.9148, GNorm = 0.4688, lr_0 = 9.9038e-04
Loss = 1.2271e-01, PNorm = 85.0295, GNorm = 0.5352, lr_0 = 9.8971e-04
Loss = 1.2946e-01, PNorm = 85.1404, GNorm = 0.5625, lr_0 = 9.8903e-04
Loss = 1.3029e-01, PNorm = 85.2570, GNorm = 0.6741, lr_0 = 9.8835e-04
Loss = 1.3517e-01, PNorm = 85.3742, GNorm = 0.6863, lr_0 = 9.8767e-04
Loss = 1.2105e-01, PNorm = 85.5011, GNorm = 0.4612, lr_0 = 9.8700e-04
Loss = 1.1542e-01, PNorm = 85.6112, GNorm = 0.3309, lr_0 = 9.8632e-04
Loss = 1.3030e-01, PNorm = 85.7365, GNorm = 0.5815, lr_0 = 9.8564e-04
Loss = 1.4021e-01, PNorm = 85.8505, GNorm = 0.6591, lr_0 = 9.8497e-04
Loss = 1.1378e-01, PNorm = 85.9805, GNorm = 0.5849, lr_0 = 9.8429e-04
Loss = 1.3358e-01, PNorm = 86.1015, GNorm = 0.7876, lr_0 = 9.8362e-04
Loss = 1.3421e-01, PNorm = 86.2413, GNorm = 0.9530, lr_0 = 9.8295e-04
Loss = 1.3931e-01, PNorm = 86.3795, GNorm = 0.4533, lr_0 = 9.8227e-04
Loss = 1.0966e-01, PNorm = 86.5223, GNorm = 0.7294, lr_0 = 9.8160e-04
Loss = 1.4892e-01, PNorm = 86.6407, GNorm = 0.6861, lr_0 = 9.8093e-04
Loss = 1.3068e-01, PNorm = 86.7786, GNorm = 0.5792, lr_0 = 9.8026e-04
Loss = 1.2785e-01, PNorm = 86.9117, GNorm = 0.6536, lr_0 = 9.7958e-04
Loss = 1.3661e-01, PNorm = 87.0401, GNorm = 0.6545, lr_0 = 9.7891e-04
Loss = 1.1357e-01, PNorm = 87.1513, GNorm = 0.5898, lr_0 = 9.7824e-04
Loss = 1.5783e-01, PNorm = 87.2833, GNorm = 0.8500, lr_0 = 9.7757e-04
Loss = 1.3662e-01, PNorm = 87.4162, GNorm = 0.5902, lr_0 = 9.7690e-04
Loss = 1.3208e-01, PNorm = 87.5451, GNorm = 1.1478, lr_0 = 9.7623e-04
Loss = 1.4020e-01, PNorm = 87.6601, GNorm = 0.9249, lr_0 = 9.7556e-04
Loss = 1.1735e-01, PNorm = 87.7830, GNorm = 0.5568, lr_0 = 9.7490e-04
Loss = 1.3255e-01, PNorm = 87.9021, GNorm = 0.7850, lr_0 = 9.7423e-04
Loss = 1.4400e-01, PNorm = 88.0179, GNorm = 0.6029, lr_0 = 9.7356e-04
Loss = 1.2895e-01, PNorm = 88.1523, GNorm = 1.0868, lr_0 = 9.7289e-04
Loss = 1.3319e-01, PNorm = 88.2765, GNorm = 0.5418, lr_0 = 9.7223e-04
Loss = 1.4070e-01, PNorm = 88.4080, GNorm = 0.8811, lr_0 = 9.7156e-04
Loss = 1.4385e-01, PNorm = 88.5340, GNorm = 0.9000, lr_0 = 9.7090e-04
Loss = 1.3333e-01, PNorm = 88.6648, GNorm = 0.6448, lr_0 = 9.7023e-04
Loss = 1.3543e-01, PNorm = 88.8047, GNorm = 0.8371, lr_0 = 9.6957e-04
Loss = 1.5562e-01, PNorm = 88.9473, GNorm = 0.8012, lr_0 = 9.6890e-04
Loss = 1.3759e-01, PNorm = 89.0979, GNorm = 0.6795, lr_0 = 9.6824e-04
Loss = 1.4809e-01, PNorm = 89.2357, GNorm = 0.6226, lr_0 = 9.6757e-04
Loss = 1.2776e-01, PNorm = 89.3774, GNorm = 0.9745, lr_0 = 9.6691e-04
Loss = 1.4016e-01, PNorm = 89.5069, GNorm = 0.7142, lr_0 = 9.6625e-04
Loss = 1.3729e-01, PNorm = 89.6442, GNorm = 0.5220, lr_0 = 9.6559e-04
Loss = 1.2350e-01, PNorm = 89.7609, GNorm = 1.1940, lr_0 = 9.6493e-04
Loss = 1.4155e-01, PNorm = 89.8829, GNorm = 0.4701, lr_0 = 9.6427e-04
Loss = 1.2916e-01, PNorm = 90.0144, GNorm = 1.0026, lr_0 = 9.6360e-04
Loss = 1.7174e-01, PNorm = 90.1455, GNorm = 0.5665, lr_0 = 9.6294e-04
Loss = 1.3820e-01, PNorm = 90.2779, GNorm = 0.5362, lr_0 = 9.6228e-04
Loss = 1.4739e-01, PNorm = 90.4140, GNorm = 0.9448, lr_0 = 9.6163e-04
Loss = 1.3307e-01, PNorm = 90.5555, GNorm = 0.5759, lr_0 = 9.6097e-04
Loss = 1.2926e-01, PNorm = 90.6697, GNorm = 0.7246, lr_0 = 9.6031e-04
Loss = 1.4394e-01, PNorm = 90.8013, GNorm = 0.7104, lr_0 = 9.5965e-04
Loss = 1.2476e-01, PNorm = 90.9194, GNorm = 0.5976, lr_0 = 9.5899e-04
Loss = 1.3163e-01, PNorm = 91.0356, GNorm = 0.9123, lr_0 = 9.5834e-04
Loss = 1.3390e-01, PNorm = 91.1487, GNorm = 0.6817, lr_0 = 9.5768e-04
Loss = 1.1969e-01, PNorm = 91.2659, GNorm = 0.7150, lr_0 = 9.5702e-04
Loss = 1.3709e-01, PNorm = 91.3727, GNorm = 0.6938, lr_0 = 9.5637e-04
Loss = 1.3002e-01, PNorm = 91.4958, GNorm = 0.6581, lr_0 = 9.5571e-04
Loss = 1.4143e-01, PNorm = 91.5968, GNorm = 0.9196, lr_0 = 9.5506e-04
Loss = 1.3782e-01, PNorm = 91.7339, GNorm = 0.9055, lr_0 = 9.5440e-04
Loss = 1.5227e-01, PNorm = 91.8700, GNorm = 1.1233, lr_0 = 9.5375e-04
Loss = 1.4252e-01, PNorm = 92.0012, GNorm = 0.8661, lr_0 = 9.5310e-04
Loss = 1.2979e-01, PNorm = 92.1303, GNorm = 1.3498, lr_0 = 9.5244e-04
Loss = 1.3796e-01, PNorm = 92.2649, GNorm = 0.4737, lr_0 = 9.5179e-04
Loss = 1.6571e-01, PNorm = 92.4015, GNorm = 0.9477, lr_0 = 9.5114e-04
Loss = 1.4061e-01, PNorm = 92.5269, GNorm = 1.0234, lr_0 = 9.5049e-04
Loss = 1.5049e-01, PNorm = 92.6597, GNorm = 1.1335, lr_0 = 9.4984e-04
Loss = 1.4806e-01, PNorm = 92.7894, GNorm = 0.5587, lr_0 = 9.4919e-04
Loss = 1.3970e-01, PNorm = 92.9164, GNorm = 0.8285, lr_0 = 9.4854e-04
Loss = 1.4126e-01, PNorm = 93.0495, GNorm = 0.6444, lr_0 = 9.4789e-04
Loss = 1.3871e-01, PNorm = 93.1652, GNorm = 0.5854, lr_0 = 9.4724e-04
Loss = 1.5248e-01, PNorm = 93.2942, GNorm = 0.6597, lr_0 = 9.4659e-04
Loss = 1.4071e-01, PNorm = 93.4130, GNorm = 1.0743, lr_0 = 9.4594e-04
Loss = 1.6137e-01, PNorm = 93.5624, GNorm = 0.7336, lr_0 = 9.4529e-04
Loss = 1.6407e-01, PNorm = 93.6985, GNorm = 1.5365, lr_0 = 9.4464e-04
Loss = 1.5262e-01, PNorm = 93.8389, GNorm = 0.7106, lr_0 = 9.4400e-04
Loss = 1.3640e-01, PNorm = 93.9819, GNorm = 0.6586, lr_0 = 9.4335e-04
Loss = 1.5958e-01, PNorm = 94.1200, GNorm = 0.7764, lr_0 = 9.4270e-04
Loss = 1.5870e-01, PNorm = 94.2628, GNorm = 0.7380, lr_0 = 9.4206e-04
Loss = 1.4571e-01, PNorm = 94.3923, GNorm = 0.9223, lr_0 = 9.4141e-04
Loss = 1.3555e-01, PNorm = 94.5291, GNorm = 0.6219, lr_0 = 9.4077e-04
Loss = 1.3528e-01, PNorm = 94.6450, GNorm = 0.6864, lr_0 = 9.4012e-04
Loss = 1.2753e-01, PNorm = 94.7586, GNorm = 0.4833, lr_0 = 9.3948e-04
Loss = 1.4021e-01, PNorm = 94.8740, GNorm = 0.6382, lr_0 = 9.3884e-04
Loss = 1.3207e-01, PNorm = 94.9897, GNorm = 0.5277, lr_0 = 9.3819e-04
Loss = 1.3479e-01, PNorm = 95.1114, GNorm = 1.0126, lr_0 = 9.3755e-04
Loss = 1.4256e-01, PNorm = 95.2290, GNorm = 0.5343, lr_0 = 9.3691e-04
Loss = 1.3223e-01, PNorm = 95.3564, GNorm = 0.4429, lr_0 = 9.3627e-04
Loss = 1.3857e-01, PNorm = 95.4639, GNorm = 0.6603, lr_0 = 9.3562e-04
Loss = 1.4469e-01, PNorm = 95.5690, GNorm = 1.2565, lr_0 = 9.3498e-04
Loss = 1.4365e-01, PNorm = 95.6829, GNorm = 0.8077, lr_0 = 9.3434e-04
Loss = 1.4513e-01, PNorm = 95.8010, GNorm = 0.9060, lr_0 = 9.3370e-04
Loss = 1.4674e-01, PNorm = 95.9167, GNorm = 0.8789, lr_0 = 9.3306e-04
Loss = 1.4231e-01, PNorm = 96.0485, GNorm = 0.7320, lr_0 = 9.3242e-04
Loss = 1.3576e-01, PNorm = 96.1633, GNorm = 0.7407, lr_0 = 9.3178e-04
Loss = 1.3651e-01, PNorm = 96.2793, GNorm = 0.4321, lr_0 = 9.3115e-04
Loss = 1.4686e-01, PNorm = 96.3870, GNorm = 0.7665, lr_0 = 9.3051e-04
Loss = 1.5799e-01, PNorm = 96.5017, GNorm = 0.5545, lr_0 = 9.2987e-04
Loss = 1.4701e-01, PNorm = 96.6324, GNorm = 1.1514, lr_0 = 9.2923e-04
Loss = 1.5920e-01, PNorm = 96.7636, GNorm = 0.7379, lr_0 = 9.2860e-04
Loss = 1.5677e-01, PNorm = 96.9159, GNorm = 0.6179, lr_0 = 9.2796e-04
Loss = 1.4689e-01, PNorm = 97.0341, GNorm = 0.5999, lr_0 = 9.2733e-04
Loss = 1.3896e-01, PNorm = 97.1665, GNorm = 0.4757, lr_0 = 9.2669e-04
Loss = 1.3682e-01, PNorm = 97.2836, GNorm = 0.8604, lr_0 = 9.2606e-04
Loss = 1.4886e-01, PNorm = 97.4071, GNorm = 1.3726, lr_0 = 9.2542e-04
Loss = 1.5178e-01, PNorm = 97.5336, GNorm = 0.8068, lr_0 = 9.2479e-04
Loss = 1.4084e-01, PNorm = 97.6742, GNorm = 0.7203, lr_0 = 9.2415e-04
Loss = 1.2712e-01, PNorm = 97.8004, GNorm = 0.7903, lr_0 = 9.2352e-04
Loss = 1.4822e-01, PNorm = 97.9275, GNorm = 0.6324, lr_0 = 9.2289e-04
Loss = 1.6635e-01, PNorm = 98.0531, GNorm = 0.7697, lr_0 = 9.2226e-04
Loss = 1.3907e-01, PNorm = 98.1748, GNorm = 0.9528, lr_0 = 9.2162e-04
Loss = 1.4020e-01, PNorm = 98.3001, GNorm = 0.6540, lr_0 = 9.2099e-04
Validation mae = 0.295338
Epoch 3
Loss = 8.3066e-02, PNorm = 98.4012, GNorm = 0.8912, lr_0 = 9.2036e-04
Loss = 8.1626e-02, PNorm = 98.4958, GNorm = 0.4007, lr_0 = 9.1973e-04
Loss = 9.2561e-02, PNorm = 98.5729, GNorm = 0.5929, lr_0 = 9.1910e-04
Loss = 8.0462e-02, PNorm = 98.6489, GNorm = 0.5012, lr_0 = 9.1847e-04
Loss = 7.5251e-02, PNorm = 98.7209, GNorm = 0.4264, lr_0 = 9.1784e-04
Loss = 7.5595e-02, PNorm = 98.7854, GNorm = 0.4330, lr_0 = 9.1721e-04
Loss = 9.3486e-02, PNorm = 98.8673, GNorm = 0.8112, lr_0 = 9.1658e-04
Loss = 8.8961e-02, PNorm = 98.9364, GNorm = 0.4685, lr_0 = 9.1596e-04
Loss = 8.3786e-02, PNorm = 99.0290, GNorm = 0.4838, lr_0 = 9.1533e-04
Loss = 7.8687e-02, PNorm = 99.1046, GNorm = 0.8711, lr_0 = 9.1470e-04
Loss = 7.6433e-02, PNorm = 99.1800, GNorm = 0.5276, lr_0 = 9.1408e-04
Loss = 7.8691e-02, PNorm = 99.2558, GNorm = 0.5685, lr_0 = 9.1345e-04
Loss = 8.2980e-02, PNorm = 99.3458, GNorm = 0.6117, lr_0 = 9.1282e-04
Loss = 7.3612e-02, PNorm = 99.4284, GNorm = 0.3756, lr_0 = 9.1220e-04
Loss = 6.7372e-02, PNorm = 99.5082, GNorm = 0.2862, lr_0 = 9.1157e-04
Loss = 7.2512e-02, PNorm = 99.5860, GNorm = 0.5734, lr_0 = 9.1095e-04
Loss = 8.2735e-02, PNorm = 99.6556, GNorm = 0.9049, lr_0 = 9.1032e-04
Loss = 7.3356e-02, PNorm = 99.7394, GNorm = 0.7576, lr_0 = 9.0970e-04
Loss = 7.1105e-02, PNorm = 99.8161, GNorm = 0.7470, lr_0 = 9.0908e-04
Loss = 6.7441e-02, PNorm = 99.8830, GNorm = 0.3811, lr_0 = 9.0846e-04
Loss = 7.8216e-02, PNorm = 99.9551, GNorm = 0.3813, lr_0 = 9.0783e-04
Loss = 9.1588e-02, PNorm = 100.0388, GNorm = 0.4393, lr_0 = 9.0721e-04
Loss = 7.0981e-02, PNorm = 100.1219, GNorm = 0.4097, lr_0 = 9.0659e-04
Loss = 8.7393e-02, PNorm = 100.2073, GNorm = 0.9019, lr_0 = 9.0597e-04
Loss = 7.9091e-02, PNorm = 100.2916, GNorm = 0.7502, lr_0 = 9.0535e-04
Loss = 6.8675e-02, PNorm = 100.3772, GNorm = 0.4276, lr_0 = 9.0473e-04
Loss = 8.0498e-02, PNorm = 100.4638, GNorm = 0.4378, lr_0 = 9.0411e-04
Loss = 7.6441e-02, PNorm = 100.5468, GNorm = 0.4194, lr_0 = 9.0349e-04
Loss = 8.5459e-02, PNorm = 100.6366, GNorm = 0.3925, lr_0 = 9.0287e-04
Loss = 8.4029e-02, PNorm = 100.7250, GNorm = 0.4953, lr_0 = 9.0225e-04
Loss = 7.9406e-02, PNorm = 100.8158, GNorm = 0.4010, lr_0 = 9.0163e-04
Loss = 9.4527e-02, PNorm = 100.9027, GNorm = 0.4407, lr_0 = 9.0102e-04
Loss = 8.3814e-02, PNorm = 101.0063, GNorm = 0.5417, lr_0 = 9.0040e-04
Loss = 8.6206e-02, PNorm = 101.1045, GNorm = 0.4270, lr_0 = 8.9978e-04
Loss = 6.9021e-02, PNorm = 101.2064, GNorm = 0.9975, lr_0 = 8.9916e-04
Loss = 8.6318e-02, PNorm = 101.2895, GNorm = 0.4676, lr_0 = 8.9855e-04
Loss = 8.2925e-02, PNorm = 101.3911, GNorm = 0.5680, lr_0 = 8.9793e-04
Loss = 8.3261e-02, PNorm = 101.4858, GNorm = 0.6394, lr_0 = 8.9732e-04
Loss = 7.6899e-02, PNorm = 101.5797, GNorm = 0.4762, lr_0 = 8.9670e-04
Loss = 8.1410e-02, PNorm = 101.6795, GNorm = 0.3676, lr_0 = 8.9609e-04
Loss = 6.6167e-02, PNorm = 101.7685, GNorm = 0.4342, lr_0 = 8.9548e-04
Loss = 7.2665e-02, PNorm = 101.8514, GNorm = 0.4322, lr_0 = 8.9486e-04
Loss = 8.2643e-02, PNorm = 101.9371, GNorm = 1.3420, lr_0 = 8.9425e-04
Loss = 8.0065e-02, PNorm = 102.0228, GNorm = 0.4647, lr_0 = 8.9364e-04
Loss = 8.3325e-02, PNorm = 102.1231, GNorm = 0.6760, lr_0 = 8.9302e-04
Loss = 8.0535e-02, PNorm = 102.2092, GNorm = 0.6344, lr_0 = 8.9241e-04
Loss = 8.2148e-02, PNorm = 102.3052, GNorm = 0.5351, lr_0 = 8.9180e-04
Loss = 9.1103e-02, PNorm = 102.3926, GNorm = 0.6778, lr_0 = 8.9119e-04
Loss = 9.5012e-02, PNorm = 102.5025, GNorm = 0.6846, lr_0 = 8.9058e-04
Loss = 8.7490e-02, PNorm = 102.5941, GNorm = 0.3794, lr_0 = 8.8997e-04
Loss = 7.9913e-02, PNorm = 102.6938, GNorm = 0.4636, lr_0 = 8.8936e-04
Loss = 8.3469e-02, PNorm = 102.7852, GNorm = 0.3505, lr_0 = 8.8875e-04
Loss = 7.9137e-02, PNorm = 102.8802, GNorm = 0.6204, lr_0 = 8.8814e-04
Loss = 8.3355e-02, PNorm = 102.9709, GNorm = 0.3745, lr_0 = 8.8753e-04
Loss = 7.1636e-02, PNorm = 103.0584, GNorm = 0.9704, lr_0 = 8.8693e-04
Loss = 8.0438e-02, PNorm = 103.1376, GNorm = 0.4962, lr_0 = 8.8632e-04
Loss = 9.2884e-02, PNorm = 103.2361, GNorm = 0.5611, lr_0 = 8.8571e-04
Loss = 8.6934e-02, PNorm = 103.3366, GNorm = 0.4463, lr_0 = 8.8510e-04
Loss = 9.2839e-02, PNorm = 103.4318, GNorm = 0.9338, lr_0 = 8.8450e-04
Loss = 9.9673e-02, PNorm = 103.5395, GNorm = 0.7777, lr_0 = 8.8389e-04
Loss = 7.8817e-02, PNorm = 103.6378, GNorm = 0.3677, lr_0 = 8.8329e-04
Loss = 7.9410e-02, PNorm = 103.7359, GNorm = 0.5197, lr_0 = 8.8268e-04
Loss = 8.6554e-02, PNorm = 103.8221, GNorm = 0.8270, lr_0 = 8.8208e-04
Loss = 8.7718e-02, PNorm = 103.9135, GNorm = 0.9407, lr_0 = 8.8147e-04
Loss = 8.6321e-02, PNorm = 104.0058, GNorm = 0.6383, lr_0 = 8.8087e-04
Loss = 1.0021e-01, PNorm = 104.0945, GNorm = 0.6199, lr_0 = 8.8026e-04
Loss = 8.8971e-02, PNorm = 104.1998, GNorm = 0.4731, lr_0 = 8.7966e-04
Loss = 8.1195e-02, PNorm = 104.2984, GNorm = 0.4763, lr_0 = 8.7906e-04
Loss = 8.6044e-02, PNorm = 104.3982, GNorm = 0.4657, lr_0 = 8.7846e-04
Loss = 7.8746e-02, PNorm = 104.5045, GNorm = 0.4441, lr_0 = 8.7785e-04
Loss = 8.6611e-02, PNorm = 104.5966, GNorm = 0.6158, lr_0 = 8.7725e-04
Loss = 9.6890e-02, PNorm = 104.6961, GNorm = 0.6804, lr_0 = 8.7665e-04
Loss = 9.0573e-02, PNorm = 104.8022, GNorm = 0.6451, lr_0 = 8.7605e-04
Loss = 9.2966e-02, PNorm = 104.9019, GNorm = 0.7414, lr_0 = 8.7545e-04
Loss = 8.9527e-02, PNorm = 105.0010, GNorm = 0.8511, lr_0 = 8.7485e-04
Loss = 8.2420e-02, PNorm = 105.1022, GNorm = 1.0543, lr_0 = 8.7425e-04
Loss = 8.7116e-02, PNorm = 105.1975, GNorm = 0.5382, lr_0 = 8.7365e-04
Loss = 1.0395e-01, PNorm = 105.2992, GNorm = 0.6671, lr_0 = 8.7306e-04
Loss = 1.0412e-01, PNorm = 105.4126, GNorm = 0.6382, lr_0 = 8.7246e-04
Loss = 1.0128e-01, PNorm = 105.5255, GNorm = 0.4016, lr_0 = 8.7186e-04
Loss = 8.8801e-02, PNorm = 105.6461, GNorm = 0.5888, lr_0 = 8.7126e-04
Loss = 9.3737e-02, PNorm = 105.7447, GNorm = 1.2578, lr_0 = 8.7067e-04
Loss = 8.8603e-02, PNorm = 105.8493, GNorm = 0.5859, lr_0 = 8.7007e-04
Loss = 8.7872e-02, PNorm = 105.9448, GNorm = 0.5897, lr_0 = 8.6947e-04
Loss = 8.7216e-02, PNorm = 106.0559, GNorm = 1.0774, lr_0 = 8.6888e-04
Loss = 1.0160e-01, PNorm = 106.1519, GNorm = 1.2137, lr_0 = 8.6828e-04
Loss = 1.0054e-01, PNorm = 106.2612, GNorm = 0.5823, lr_0 = 8.6769e-04
Loss = 1.1698e-01, PNorm = 106.3707, GNorm = 0.7516, lr_0 = 8.6709e-04
Loss = 8.5734e-02, PNorm = 106.4693, GNorm = 0.8241, lr_0 = 8.6650e-04
Loss = 1.0150e-01, PNorm = 106.5811, GNorm = 0.7680, lr_0 = 8.6590e-04
Loss = 9.5724e-02, PNorm = 106.6866, GNorm = 0.7774, lr_0 = 8.6531e-04
Loss = 8.3674e-02, PNorm = 106.8061, GNorm = 0.8690, lr_0 = 8.6472e-04
Loss = 9.6858e-02, PNorm = 106.9136, GNorm = 0.8136, lr_0 = 8.6413e-04
Loss = 9.7606e-02, PNorm = 107.0205, GNorm = 0.3895, lr_0 = 8.6353e-04
Loss = 8.7550e-02, PNorm = 107.1278, GNorm = 0.5427, lr_0 = 8.6294e-04
Loss = 9.3493e-02, PNorm = 107.2274, GNorm = 0.8377, lr_0 = 8.6235e-04
Loss = 9.8669e-02, PNorm = 107.3319, GNorm = 0.4367, lr_0 = 8.6176e-04
Loss = 1.0069e-01, PNorm = 107.4471, GNorm = 0.3785, lr_0 = 8.6117e-04
Loss = 1.0592e-01, PNorm = 107.5674, GNorm = 0.8409, lr_0 = 8.6058e-04
Loss = 8.9552e-02, PNorm = 107.6900, GNorm = 0.5518, lr_0 = 8.5999e-04
Loss = 9.6896e-02, PNorm = 107.8047, GNorm = 0.6963, lr_0 = 8.5940e-04
Loss = 9.2502e-02, PNorm = 107.9119, GNorm = 0.7862, lr_0 = 8.5881e-04
Loss = 9.2722e-02, PNorm = 108.0117, GNorm = 0.7839, lr_0 = 8.5823e-04
Loss = 8.9376e-02, PNorm = 108.1284, GNorm = 0.5317, lr_0 = 8.5764e-04
Loss = 8.8712e-02, PNorm = 108.2377, GNorm = 0.7112, lr_0 = 8.5705e-04
Loss = 1.0520e-01, PNorm = 108.3508, GNorm = 0.9462, lr_0 = 8.5646e-04
Loss = 8.9709e-02, PNorm = 108.4570, GNorm = 0.9548, lr_0 = 8.5588e-04
Loss = 9.2342e-02, PNorm = 108.5675, GNorm = 0.8508, lr_0 = 8.5529e-04
Loss = 9.9174e-02, PNorm = 108.6749, GNorm = 0.4779, lr_0 = 8.5470e-04
Loss = 1.0797e-01, PNorm = 108.7903, GNorm = 0.5528, lr_0 = 8.5412e-04
Loss = 8.8757e-02, PNorm = 108.9096, GNorm = 1.0141, lr_0 = 8.5353e-04
Loss = 8.8731e-02, PNorm = 109.0252, GNorm = 0.4556, lr_0 = 8.5295e-04
Loss = 9.8007e-02, PNorm = 109.1250, GNorm = 0.6352, lr_0 = 8.5236e-04
Loss = 9.0024e-02, PNorm = 109.2446, GNorm = 0.8201, lr_0 = 8.5178e-04
Loss = 8.5197e-02, PNorm = 109.3402, GNorm = 0.5272, lr_0 = 8.5120e-04
Loss = 9.1018e-02, PNorm = 109.4451, GNorm = 0.4134, lr_0 = 8.5061e-04
Loss = 1.0338e-01, PNorm = 109.5389, GNorm = 1.3180, lr_0 = 8.5003e-04
Loss = 1.0170e-01, PNorm = 109.6486, GNorm = 0.5766, lr_0 = 8.4945e-04
Loss = 9.3558e-02, PNorm = 109.7538, GNorm = 0.4361, lr_0 = 8.4887e-04
Loss = 1.0679e-01, PNorm = 109.8665, GNorm = 1.1066, lr_0 = 8.4828e-04
Validation mae = 0.297448
Epoch 4
Loss = 6.4748e-02, PNorm = 109.9659, GNorm = 0.8935, lr_0 = 8.4770e-04
Loss = 6.1317e-02, PNorm = 110.0476, GNorm = 0.3827, lr_0 = 8.4712e-04
Loss = 5.9791e-02, PNorm = 110.1204, GNorm = 0.8694, lr_0 = 8.4654e-04
Loss = 5.5615e-02, PNorm = 110.1849, GNorm = 0.4013, lr_0 = 8.4596e-04
Loss = 5.8935e-02, PNorm = 110.2539, GNorm = 0.7965, lr_0 = 8.4538e-04
Loss = 5.0656e-02, PNorm = 110.3177, GNorm = 0.4761, lr_0 = 8.4480e-04
Loss = 6.3902e-02, PNorm = 110.3866, GNorm = 1.0769, lr_0 = 8.4423e-04
Loss = 5.1043e-02, PNorm = 110.4547, GNorm = 0.6441, lr_0 = 8.4365e-04
Loss = 5.6888e-02, PNorm = 110.5056, GNorm = 0.5105, lr_0 = 8.4307e-04
Loss = 6.5277e-02, PNorm = 110.5775, GNorm = 1.1015, lr_0 = 8.4249e-04
Loss = 5.3591e-02, PNorm = 110.6439, GNorm = 0.4559, lr_0 = 8.4191e-04
Loss = 5.3717e-02, PNorm = 110.7113, GNorm = 0.2816, lr_0 = 8.4134e-04
Loss = 5.4033e-02, PNorm = 110.7711, GNorm = 0.3588, lr_0 = 8.4076e-04
Loss = 4.9354e-02, PNorm = 110.8351, GNorm = 0.5238, lr_0 = 8.4019e-04
Loss = 6.0874e-02, PNorm = 110.8933, GNorm = 0.3803, lr_0 = 8.3961e-04
Loss = 5.2854e-02, PNorm = 110.9683, GNorm = 0.5591, lr_0 = 8.3903e-04
Loss = 5.3044e-02, PNorm = 111.0230, GNorm = 0.7238, lr_0 = 8.3846e-04
Loss = 4.7231e-02, PNorm = 111.0937, GNorm = 0.4564, lr_0 = 8.3789e-04
Loss = 4.9129e-02, PNorm = 111.1481, GNorm = 0.4552, lr_0 = 8.3731e-04
Loss = 4.6395e-02, PNorm = 111.2103, GNorm = 0.3046, lr_0 = 8.3674e-04
Loss = 4.9822e-02, PNorm = 111.2707, GNorm = 0.4925, lr_0 = 8.3616e-04
Loss = 6.5791e-02, PNorm = 111.3412, GNorm = 1.1658, lr_0 = 8.3559e-04
Loss = 5.4087e-02, PNorm = 111.4110, GNorm = 0.6404, lr_0 = 8.3502e-04
Loss = 5.7527e-02, PNorm = 111.4895, GNorm = 0.4317, lr_0 = 8.3445e-04
Loss = 5.7299e-02, PNorm = 111.5616, GNorm = 0.6411, lr_0 = 8.3388e-04
Loss = 4.8656e-02, PNorm = 111.6340, GNorm = 0.5728, lr_0 = 8.3330e-04
Loss = 5.4686e-02, PNorm = 111.7017, GNorm = 0.4014, lr_0 = 8.3273e-04
Loss = 5.2127e-02, PNorm = 111.7669, GNorm = 0.4953, lr_0 = 8.3216e-04
Loss = 5.8571e-02, PNorm = 111.8247, GNorm = 0.5644, lr_0 = 8.3159e-04
Loss = 4.9869e-02, PNorm = 111.8917, GNorm = 0.7417, lr_0 = 8.3102e-04
Loss = 4.8756e-02, PNorm = 111.9565, GNorm = 0.7113, lr_0 = 8.3045e-04
Loss = 5.5340e-02, PNorm = 112.0251, GNorm = 0.3619, lr_0 = 8.2988e-04
Loss = 5.6293e-02, PNorm = 112.0864, GNorm = 0.5444, lr_0 = 8.2932e-04
Loss = 4.9670e-02, PNorm = 112.1532, GNorm = 0.3700, lr_0 = 8.2875e-04
Loss = 5.7263e-02, PNorm = 112.2306, GNorm = 0.2433, lr_0 = 8.2818e-04
Loss = 4.9537e-02, PNorm = 112.3031, GNorm = 0.6341, lr_0 = 8.2761e-04
Loss = 4.8620e-02, PNorm = 112.3739, GNorm = 0.4958, lr_0 = 8.2705e-04
Loss = 4.9547e-02, PNorm = 112.4395, GNorm = 0.6701, lr_0 = 8.2648e-04
Loss = 5.9019e-02, PNorm = 112.5035, GNorm = 0.7019, lr_0 = 8.2591e-04
Loss = 6.3961e-02, PNorm = 112.5722, GNorm = 0.3949, lr_0 = 8.2535e-04
Loss = 6.3861e-02, PNorm = 112.6589, GNorm = 0.5881, lr_0 = 8.2478e-04
Loss = 5.8130e-02, PNorm = 112.7344, GNorm = 0.9125, lr_0 = 8.2422e-04
Loss = 5.9245e-02, PNorm = 112.8122, GNorm = 0.6961, lr_0 = 8.2365e-04
Loss = 5.7732e-02, PNorm = 112.8932, GNorm = 0.6448, lr_0 = 8.2309e-04
Loss = 5.6682e-02, PNorm = 112.9680, GNorm = 0.3679, lr_0 = 8.2252e-04
Loss = 4.8254e-02, PNorm = 113.0418, GNorm = 0.3173, lr_0 = 8.2196e-04
Loss = 5.7593e-02, PNorm = 113.1144, GNorm = 0.4980, lr_0 = 8.2140e-04
Loss = 6.0586e-02, PNorm = 113.1764, GNorm = 0.5086, lr_0 = 8.2084e-04
Loss = 5.3587e-02, PNorm = 113.2533, GNorm = 0.3379, lr_0 = 8.2027e-04
Loss = 5.2455e-02, PNorm = 113.3248, GNorm = 0.7277, lr_0 = 8.1971e-04
Loss = 5.6916e-02, PNorm = 113.3920, GNorm = 0.5707, lr_0 = 8.1915e-04
Loss = 6.3054e-02, PNorm = 113.4681, GNorm = 0.4193, lr_0 = 8.1859e-04
Loss = 5.8188e-02, PNorm = 113.5495, GNorm = 0.5221, lr_0 = 8.1803e-04
Loss = 5.3023e-02, PNorm = 113.6451, GNorm = 0.7565, lr_0 = 8.1747e-04
Loss = 6.3335e-02, PNorm = 113.7352, GNorm = 0.8623, lr_0 = 8.1691e-04
Loss = 6.2982e-02, PNorm = 113.8290, GNorm = 0.3351, lr_0 = 8.1635e-04
Loss = 6.2397e-02, PNorm = 113.9027, GNorm = 0.8299, lr_0 = 8.1579e-04
Loss = 6.0449e-02, PNorm = 113.9906, GNorm = 0.3824, lr_0 = 8.1523e-04
Loss = 6.0253e-02, PNorm = 114.0679, GNorm = 0.5404, lr_0 = 8.1467e-04
Loss = 5.8203e-02, PNorm = 114.1510, GNorm = 0.4503, lr_0 = 8.1411e-04
Loss = 5.4425e-02, PNorm = 114.2214, GNorm = 0.7272, lr_0 = 8.1355e-04
Loss = 6.0901e-02, PNorm = 114.3018, GNorm = 0.6216, lr_0 = 8.1300e-04
Loss = 5.5523e-02, PNorm = 114.3750, GNorm = 0.3283, lr_0 = 8.1244e-04
Loss = 6.4912e-02, PNorm = 114.4662, GNorm = 1.3361, lr_0 = 8.1188e-04
Loss = 7.0906e-02, PNorm = 114.5506, GNorm = 0.6159, lr_0 = 8.1133e-04
Loss = 5.8573e-02, PNorm = 114.6481, GNorm = 0.4133, lr_0 = 8.1077e-04
Loss = 5.9239e-02, PNorm = 114.7375, GNorm = 0.4123, lr_0 = 8.1022e-04
Loss = 5.9257e-02, PNorm = 114.8280, GNorm = 0.3680, lr_0 = 8.0966e-04
Loss = 5.9402e-02, PNorm = 114.9099, GNorm = 0.4706, lr_0 = 8.0911e-04
Loss = 6.7806e-02, PNorm = 114.9886, GNorm = 0.4021, lr_0 = 8.0855e-04
Loss = 6.3867e-02, PNorm = 115.0723, GNorm = 0.3813, lr_0 = 8.0800e-04
Loss = 5.9396e-02, PNorm = 115.1611, GNorm = 0.5941, lr_0 = 8.0745e-04
Loss = 6.4492e-02, PNorm = 115.2534, GNorm = 0.4350, lr_0 = 8.0689e-04
Loss = 6.0464e-02, PNorm = 115.3485, GNorm = 0.3701, lr_0 = 8.0634e-04
Loss = 6.0083e-02, PNorm = 115.4468, GNorm = 0.5707, lr_0 = 8.0579e-04
Loss = 5.8228e-02, PNorm = 115.5331, GNorm = 0.5177, lr_0 = 8.0523e-04
Loss = 6.2594e-02, PNorm = 115.6254, GNorm = 0.3763, lr_0 = 8.0468e-04
Loss = 6.9354e-02, PNorm = 115.7124, GNorm = 0.5717, lr_0 = 8.0413e-04
Loss = 5.6038e-02, PNorm = 115.7998, GNorm = 1.3039, lr_0 = 8.0358e-04
Loss = 7.8472e-02, PNorm = 115.8876, GNorm = 0.9051, lr_0 = 8.0303e-04
Loss = 6.7395e-02, PNorm = 115.9819, GNorm = 0.7728, lr_0 = 8.0248e-04
Loss = 6.5597e-02, PNorm = 116.0790, GNorm = 0.3834, lr_0 = 8.0193e-04
Loss = 6.3438e-02, PNorm = 116.1776, GNorm = 0.7596, lr_0 = 8.0138e-04
Loss = 6.5634e-02, PNorm = 116.2670, GNorm = 0.5699, lr_0 = 8.0083e-04
Loss = 6.2253e-02, PNorm = 116.3577, GNorm = 0.4721, lr_0 = 8.0028e-04
Loss = 6.2393e-02, PNorm = 116.4487, GNorm = 0.7265, lr_0 = 7.9974e-04
Loss = 6.1693e-02, PNorm = 116.5331, GNorm = 0.5441, lr_0 = 7.9919e-04
Loss = 6.5183e-02, PNorm = 116.6150, GNorm = 0.6588, lr_0 = 7.9864e-04
Loss = 6.6233e-02, PNorm = 116.7095, GNorm = 0.4717, lr_0 = 7.9809e-04
Loss = 6.3947e-02, PNorm = 116.8039, GNorm = 0.5890, lr_0 = 7.9755e-04
Loss = 5.9473e-02, PNorm = 116.8954, GNorm = 0.4098, lr_0 = 7.9700e-04
Loss = 6.6875e-02, PNorm = 116.9885, GNorm = 1.0322, lr_0 = 7.9645e-04
Loss = 7.2760e-02, PNorm = 117.0793, GNorm = 0.6262, lr_0 = 7.9591e-04
Loss = 5.7313e-02, PNorm = 117.1867, GNorm = 0.5379, lr_0 = 7.9536e-04
Loss = 6.2339e-02, PNorm = 117.2783, GNorm = 0.5461, lr_0 = 7.9482e-04
Loss = 6.8181e-02, PNorm = 117.3730, GNorm = 0.7693, lr_0 = 7.9427e-04
Loss = 6.9706e-02, PNorm = 117.4727, GNorm = 0.4481, lr_0 = 7.9373e-04
Loss = 6.6612e-02, PNorm = 117.5724, GNorm = 1.0481, lr_0 = 7.9319e-04
Loss = 6.9349e-02, PNorm = 117.6659, GNorm = 0.3538, lr_0 = 7.9264e-04
Loss = 7.7606e-02, PNorm = 117.7658, GNorm = 1.0256, lr_0 = 7.9210e-04
Loss = 7.3734e-02, PNorm = 117.8687, GNorm = 0.7693, lr_0 = 7.9156e-04
Loss = 7.1366e-02, PNorm = 117.9755, GNorm = 0.5071, lr_0 = 7.9101e-04
Loss = 7.2144e-02, PNorm = 118.0715, GNorm = 0.5065, lr_0 = 7.9047e-04
Loss = 7.1242e-02, PNorm = 118.1534, GNorm = 0.3418, lr_0 = 7.8993e-04
Loss = 7.1918e-02, PNorm = 118.2461, GNorm = 0.5349, lr_0 = 7.8939e-04
Loss = 6.2408e-02, PNorm = 118.3383, GNorm = 0.4689, lr_0 = 7.8885e-04
Loss = 6.9353e-02, PNorm = 118.4323, GNorm = 1.2162, lr_0 = 7.8831e-04
Loss = 6.6050e-02, PNorm = 118.5369, GNorm = 0.8242, lr_0 = 7.8777e-04
Loss = 6.7634e-02, PNorm = 118.6284, GNorm = 0.7991, lr_0 = 7.8723e-04
Loss = 6.6294e-02, PNorm = 118.7182, GNorm = 0.4944, lr_0 = 7.8669e-04
Loss = 6.7148e-02, PNorm = 118.8045, GNorm = 0.7710, lr_0 = 7.8615e-04
Loss = 6.8965e-02, PNorm = 118.8966, GNorm = 0.4704, lr_0 = 7.8561e-04
Loss = 6.9523e-02, PNorm = 118.9926, GNorm = 0.5791, lr_0 = 7.8507e-04
Loss = 7.5473e-02, PNorm = 119.0859, GNorm = 0.8062, lr_0 = 7.8454e-04
Loss = 6.5435e-02, PNorm = 119.1918, GNorm = 0.8132, lr_0 = 7.8400e-04
Loss = 7.6604e-02, PNorm = 119.2914, GNorm = 0.6399, lr_0 = 7.8346e-04
Loss = 6.8517e-02, PNorm = 119.3941, GNorm = 0.5976, lr_0 = 7.8293e-04
Loss = 6.5458e-02, PNorm = 119.4900, GNorm = 0.3100, lr_0 = 7.8239e-04
Loss = 6.5509e-02, PNorm = 119.5847, GNorm = 0.7225, lr_0 = 7.8185e-04
Loss = 7.1313e-02, PNorm = 119.6823, GNorm = 0.5530, lr_0 = 7.8132e-04
Validation mae = 0.289069
Epoch 5
Loss = 4.9984e-02, PNorm = 119.7656, GNorm = 0.3572, lr_0 = 7.8078e-04
Loss = 3.8065e-02, PNorm = 119.8401, GNorm = 0.3400, lr_0 = 7.8025e-04
Loss = 4.3267e-02, PNorm = 119.9046, GNorm = 0.2551, lr_0 = 7.7971e-04
Loss = 4.2320e-02, PNorm = 119.9665, GNorm = 0.3157, lr_0 = 7.7918e-04
Loss = 3.7731e-02, PNorm = 120.0297, GNorm = 0.2217, lr_0 = 7.7864e-04
Loss = 4.1959e-02, PNorm = 120.0756, GNorm = 0.5391, lr_0 = 7.7811e-04
Loss = 4.0301e-02, PNorm = 120.1361, GNorm = 0.5164, lr_0 = 7.7758e-04
Loss = 4.7669e-02, PNorm = 120.1922, GNorm = 0.5479, lr_0 = 7.7705e-04
Loss = 4.2325e-02, PNorm = 120.2437, GNorm = 0.8043, lr_0 = 7.7651e-04
Loss = 4.1589e-02, PNorm = 120.3072, GNorm = 0.2596, lr_0 = 7.7598e-04
Loss = 4.8764e-02, PNorm = 120.3582, GNorm = 0.4697, lr_0 = 7.7545e-04
Loss = 4.3242e-02, PNorm = 120.4188, GNorm = 0.5621, lr_0 = 7.7492e-04
Loss = 4.7425e-02, PNorm = 120.4695, GNorm = 0.9332, lr_0 = 7.7439e-04
Loss = 4.1760e-02, PNorm = 120.5390, GNorm = 0.2670, lr_0 = 7.7386e-04
Loss = 4.6047e-02, PNorm = 120.5993, GNorm = 0.7098, lr_0 = 7.7333e-04
Loss = 4.1427e-02, PNorm = 120.6570, GNorm = 0.6423, lr_0 = 7.7280e-04
Loss = 4.7572e-02, PNorm = 120.7166, GNorm = 0.5118, lr_0 = 7.7227e-04
Loss = 4.2854e-02, PNorm = 120.7749, GNorm = 0.4403, lr_0 = 7.7174e-04
Loss = 4.1102e-02, PNorm = 120.8419, GNorm = 0.3451, lr_0 = 7.7121e-04
Loss = 4.4689e-02, PNorm = 120.8975, GNorm = 1.0185, lr_0 = 7.7068e-04
Loss = 4.1465e-02, PNorm = 120.9567, GNorm = 0.8523, lr_0 = 7.7015e-04
Loss = 4.8050e-02, PNorm = 121.0200, GNorm = 0.9690, lr_0 = 7.6963e-04
Loss = 4.2624e-02, PNorm = 121.0735, GNorm = 0.3924, lr_0 = 7.6910e-04
Loss = 3.7899e-02, PNorm = 121.1371, GNorm = 0.6452, lr_0 = 7.6857e-04
Loss = 4.4180e-02, PNorm = 121.1984, GNorm = 0.2178, lr_0 = 7.6805e-04
Loss = 4.1132e-02, PNorm = 121.2686, GNorm = 0.4605, lr_0 = 7.6752e-04
Loss = 3.6783e-02, PNorm = 121.3229, GNorm = 0.6215, lr_0 = 7.6699e-04
Loss = 3.4924e-02, PNorm = 121.3719, GNorm = 0.4853, lr_0 = 7.6647e-04
Loss = 4.0140e-02, PNorm = 121.4235, GNorm = 0.5251, lr_0 = 7.6594e-04
Loss = 4.6882e-02, PNorm = 121.4801, GNorm = 0.7570, lr_0 = 7.6542e-04
Loss = 4.4618e-02, PNorm = 121.5408, GNorm = 0.3068, lr_0 = 7.6489e-04
Loss = 4.0217e-02, PNorm = 121.6081, GNorm = 0.4439, lr_0 = 7.6437e-04
Loss = 3.5532e-02, PNorm = 121.6747, GNorm = 0.9115, lr_0 = 7.6385e-04
Loss = 4.6694e-02, PNorm = 121.7332, GNorm = 0.4392, lr_0 = 7.6332e-04
Loss = 4.4555e-02, PNorm = 121.8041, GNorm = 0.8015, lr_0 = 7.6280e-04
Loss = 3.9760e-02, PNorm = 121.8706, GNorm = 0.6605, lr_0 = 7.6228e-04
Loss = 3.5415e-02, PNorm = 121.9312, GNorm = 0.3331, lr_0 = 7.6176e-04
Loss = 4.1499e-02, PNorm = 121.9937, GNorm = 0.5032, lr_0 = 7.6123e-04
Loss = 4.3935e-02, PNorm = 122.0507, GNorm = 0.3501, lr_0 = 7.6071e-04
Loss = 4.5734e-02, PNorm = 122.1235, GNorm = 0.3008, lr_0 = 7.6019e-04
Loss = 4.0549e-02, PNorm = 122.1864, GNorm = 0.7720, lr_0 = 7.5967e-04
Loss = 3.8177e-02, PNorm = 122.2441, GNorm = 0.2713, lr_0 = 7.5915e-04
Loss = 3.9188e-02, PNorm = 122.3085, GNorm = 0.3066, lr_0 = 7.5863e-04
Loss = 4.0943e-02, PNorm = 122.3624, GNorm = 0.2635, lr_0 = 7.5811e-04
Loss = 4.7315e-02, PNorm = 122.4282, GNorm = 0.7033, lr_0 = 7.5759e-04
Loss = 4.3549e-02, PNorm = 122.4965, GNorm = 0.6324, lr_0 = 7.5707e-04
Loss = 3.9163e-02, PNorm = 122.5675, GNorm = 0.2704, lr_0 = 7.5655e-04
Loss = 3.8674e-02, PNorm = 122.6295, GNorm = 0.4760, lr_0 = 7.5603e-04
Loss = 4.0870e-02, PNorm = 122.6904, GNorm = 0.4326, lr_0 = 7.5552e-04
Loss = 4.1912e-02, PNorm = 122.7515, GNorm = 0.3917, lr_0 = 7.5500e-04
Loss = 3.5682e-02, PNorm = 122.8139, GNorm = 0.6860, lr_0 = 7.5448e-04
Loss = 3.9702e-02, PNorm = 122.8774, GNorm = 0.7377, lr_0 = 7.5397e-04
Loss = 4.0696e-02, PNorm = 122.9364, GNorm = 0.4791, lr_0 = 7.5345e-04
Loss = 4.6793e-02, PNorm = 122.9985, GNorm = 0.5367, lr_0 = 7.5293e-04
Loss = 4.5684e-02, PNorm = 123.0625, GNorm = 0.5691, lr_0 = 7.5242e-04
Loss = 4.5512e-02, PNorm = 123.1344, GNorm = 0.5185, lr_0 = 7.5190e-04
Loss = 4.2385e-02, PNorm = 123.2043, GNorm = 0.2668, lr_0 = 7.5139e-04
Loss = 4.4366e-02, PNorm = 123.2656, GNorm = 0.4883, lr_0 = 7.5087e-04
Loss = 4.0602e-02, PNorm = 123.3326, GNorm = 0.5071, lr_0 = 7.5036e-04
Loss = 4.2612e-02, PNorm = 123.4032, GNorm = 0.5296, lr_0 = 7.4984e-04
Loss = 4.6690e-02, PNorm = 123.4686, GNorm = 0.6951, lr_0 = 7.4933e-04
Loss = 4.5997e-02, PNorm = 123.5380, GNorm = 0.7203, lr_0 = 7.4882e-04
Loss = 4.0044e-02, PNorm = 123.6145, GNorm = 0.2782, lr_0 = 7.4830e-04
Loss = 3.7197e-02, PNorm = 123.6891, GNorm = 0.4751, lr_0 = 7.4779e-04
Loss = 4.1316e-02, PNorm = 123.7641, GNorm = 0.4599, lr_0 = 7.4728e-04
Loss = 3.7310e-02, PNorm = 123.8320, GNorm = 0.4995, lr_0 = 7.4677e-04
Loss = 4.9052e-02, PNorm = 123.9034, GNorm = 1.1176, lr_0 = 7.4625e-04
Loss = 3.9718e-02, PNorm = 123.9731, GNorm = 0.4703, lr_0 = 7.4574e-04
Loss = 3.9412e-02, PNorm = 124.0421, GNorm = 0.3867, lr_0 = 7.4523e-04
Loss = 4.0510e-02, PNorm = 124.1017, GNorm = 0.2926, lr_0 = 7.4472e-04
Loss = 3.5930e-02, PNorm = 124.1640, GNorm = 0.5492, lr_0 = 7.4421e-04
Loss = 4.5532e-02, PNorm = 124.2330, GNorm = 0.4731, lr_0 = 7.4370e-04
Loss = 5.5819e-02, PNorm = 124.3022, GNorm = 0.8009, lr_0 = 7.4319e-04
Loss = 4.3903e-02, PNorm = 124.3827, GNorm = 0.7464, lr_0 = 7.4268e-04
Loss = 4.1913e-02, PNorm = 124.4574, GNorm = 0.4145, lr_0 = 7.4217e-04
Loss = 4.2342e-02, PNorm = 124.5209, GNorm = 0.2162, lr_0 = 7.4167e-04
Loss = 4.8814e-02, PNorm = 124.5920, GNorm = 0.3643, lr_0 = 7.4116e-04
Loss = 4.5104e-02, PNorm = 124.6667, GNorm = 0.5552, lr_0 = 7.4065e-04
Loss = 4.5426e-02, PNorm = 124.7380, GNorm = 0.6294, lr_0 = 7.4014e-04
Loss = 4.5477e-02, PNorm = 124.8176, GNorm = 0.2706, lr_0 = 7.3964e-04
Loss = 3.9599e-02, PNorm = 124.8972, GNorm = 0.4615, lr_0 = 7.3913e-04
Loss = 5.1892e-02, PNorm = 124.9697, GNorm = 0.3043, lr_0 = 7.3862e-04
Loss = 4.3838e-02, PNorm = 125.0504, GNorm = 0.7755, lr_0 = 7.3812e-04
Loss = 4.5242e-02, PNorm = 125.1274, GNorm = 0.3801, lr_0 = 7.3761e-04
Loss = 3.9573e-02, PNorm = 125.2075, GNorm = 0.3919, lr_0 = 7.3711e-04
Loss = 4.2372e-02, PNorm = 125.2834, GNorm = 0.2722, lr_0 = 7.3660e-04
Loss = 4.2290e-02, PNorm = 125.3561, GNorm = 0.3447, lr_0 = 7.3610e-04
Loss = 3.5687e-02, PNorm = 125.4297, GNorm = 0.3204, lr_0 = 7.3559e-04
Loss = 4.9495e-02, PNorm = 125.5015, GNorm = 0.5404, lr_0 = 7.3509e-04
Loss = 5.1775e-02, PNorm = 125.5851, GNorm = 0.3080, lr_0 = 7.3458e-04
Loss = 3.9918e-02, PNorm = 125.6598, GNorm = 0.3350, lr_0 = 7.3408e-04
Loss = 3.9281e-02, PNorm = 125.7371, GNorm = 0.2809, lr_0 = 7.3358e-04
Loss = 4.1103e-02, PNorm = 125.8078, GNorm = 0.8391, lr_0 = 7.3308e-04
Loss = 4.0940e-02, PNorm = 125.8852, GNorm = 0.5186, lr_0 = 7.3257e-04
Loss = 5.4868e-02, PNorm = 125.9648, GNorm = 0.3161, lr_0 = 7.3207e-04
Loss = 5.2949e-02, PNorm = 126.0504, GNorm = 0.9847, lr_0 = 7.3157e-04
Loss = 4.7585e-02, PNorm = 126.1345, GNorm = 0.6179, lr_0 = 7.3107e-04
Loss = 4.4945e-02, PNorm = 126.2225, GNorm = 0.7990, lr_0 = 7.3057e-04
Loss = 5.1411e-02, PNorm = 126.3176, GNorm = 0.3955, lr_0 = 7.3007e-04
Loss = 4.6927e-02, PNorm = 126.3931, GNorm = 0.7592, lr_0 = 7.2957e-04
Loss = 5.0902e-02, PNorm = 126.4769, GNorm = 0.3951, lr_0 = 7.2907e-04
Loss = 4.4714e-02, PNorm = 126.5613, GNorm = 0.2326, lr_0 = 7.2857e-04
Loss = 4.0560e-02, PNorm = 126.6416, GNorm = 0.5548, lr_0 = 7.2807e-04
Loss = 4.3956e-02, PNorm = 126.7177, GNorm = 0.5251, lr_0 = 7.2757e-04
Loss = 4.4770e-02, PNorm = 126.7895, GNorm = 0.4748, lr_0 = 7.2707e-04
Loss = 4.4070e-02, PNorm = 126.8740, GNorm = 0.6094, lr_0 = 7.2657e-04
Loss = 4.4500e-02, PNorm = 126.9483, GNorm = 0.4831, lr_0 = 7.2608e-04
Loss = 5.0019e-02, PNorm = 127.0251, GNorm = 0.5992, lr_0 = 7.2558e-04
Loss = 4.3251e-02, PNorm = 127.1093, GNorm = 0.3891, lr_0 = 7.2508e-04
Loss = 4.2810e-02, PNorm = 127.1860, GNorm = 0.4997, lr_0 = 7.2458e-04
Loss = 5.0259e-02, PNorm = 127.2593, GNorm = 0.6065, lr_0 = 7.2409e-04
Loss = 5.0831e-02, PNorm = 127.3490, GNorm = 0.5430, lr_0 = 7.2359e-04
Loss = 4.3193e-02, PNorm = 127.4316, GNorm = 0.5372, lr_0 = 7.2310e-04
Loss = 4.8239e-02, PNorm = 127.5165, GNorm = 0.3541, lr_0 = 7.2260e-04
Loss = 4.9454e-02, PNorm = 127.6019, GNorm = 0.3334, lr_0 = 7.2211e-04
Loss = 4.7133e-02, PNorm = 127.6858, GNorm = 0.7836, lr_0 = 7.2161e-04
Loss = 5.4798e-02, PNorm = 127.7660, GNorm = 0.3465, lr_0 = 7.2112e-04
Loss = 4.4024e-02, PNorm = 127.8500, GNorm = 0.3125, lr_0 = 7.2062e-04
Loss = 5.5413e-02, PNorm = 127.9342, GNorm = 0.6632, lr_0 = 7.2013e-04
Loss = 4.9402e-02, PNorm = 128.0208, GNorm = 0.8418, lr_0 = 7.1964e-04
Validation mae = 0.287538
Epoch 6
Loss = 4.2545e-02, PNorm = 128.0936, GNorm = 0.4356, lr_0 = 7.1914e-04
Loss = 3.2591e-02, PNorm = 128.1553, GNorm = 0.2697, lr_0 = 7.1865e-04
Loss = 3.2702e-02, PNorm = 128.2067, GNorm = 0.4349, lr_0 = 7.1816e-04
Loss = 3.3344e-02, PNorm = 128.2624, GNorm = 0.2506, lr_0 = 7.1767e-04
Loss = 2.8574e-02, PNorm = 128.3053, GNorm = 0.4398, lr_0 = 7.1717e-04
Loss = 3.3077e-02, PNorm = 128.3430, GNorm = 0.4509, lr_0 = 7.1668e-04
Loss = 3.1285e-02, PNorm = 128.3895, GNorm = 0.7354, lr_0 = 7.1619e-04
Loss = 3.5866e-02, PNorm = 128.4341, GNorm = 0.4390, lr_0 = 7.1570e-04
Loss = 3.9828e-02, PNorm = 128.4847, GNorm = 1.0363, lr_0 = 7.1521e-04
Loss = 4.0538e-02, PNorm = 128.5332, GNorm = 0.4302, lr_0 = 7.1472e-04
Loss = 3.0393e-02, PNorm = 128.5873, GNorm = 0.7699, lr_0 = 7.1423e-04
Loss = 2.8038e-02, PNorm = 128.6367, GNorm = 0.2609, lr_0 = 7.1374e-04
Loss = 3.5475e-02, PNorm = 128.6859, GNorm = 0.3629, lr_0 = 7.1325e-04
Loss = 3.2985e-02, PNorm = 128.7315, GNorm = 0.5643, lr_0 = 7.1277e-04
Loss = 3.0389e-02, PNorm = 128.7831, GNorm = 0.4987, lr_0 = 7.1228e-04
Loss = 3.7716e-02, PNorm = 128.8389, GNorm = 0.6398, lr_0 = 7.1179e-04
Loss = 2.6833e-02, PNorm = 128.8938, GNorm = 0.4805, lr_0 = 7.1130e-04
Loss = 3.1163e-02, PNorm = 128.9438, GNorm = 0.5391, lr_0 = 7.1081e-04
Loss = 2.9152e-02, PNorm = 128.9872, GNorm = 0.5968, lr_0 = 7.1033e-04
Loss = 3.2150e-02, PNorm = 129.0330, GNorm = 0.3154, lr_0 = 7.0984e-04
Loss = 3.2048e-02, PNorm = 129.0805, GNorm = 0.2076, lr_0 = 7.0935e-04
Loss = 3.3222e-02, PNorm = 129.1364, GNorm = 0.9324, lr_0 = 7.0887e-04
Loss = 3.0060e-02, PNorm = 129.1811, GNorm = 0.6243, lr_0 = 7.0838e-04
Loss = 3.8185e-02, PNorm = 129.2314, GNorm = 0.7054, lr_0 = 7.0790e-04
Loss = 3.1932e-02, PNorm = 129.2899, GNorm = 0.8130, lr_0 = 7.0741e-04
Loss = 3.2933e-02, PNorm = 129.3447, GNorm = 0.3362, lr_0 = 7.0693e-04
Loss = 2.8909e-02, PNorm = 129.4061, GNorm = 0.3304, lr_0 = 7.0644e-04
Loss = 4.0368e-02, PNorm = 129.4610, GNorm = 0.3140, lr_0 = 7.0596e-04
Loss = 3.5297e-02, PNorm = 129.5182, GNorm = 0.3201, lr_0 = 7.0548e-04
Loss = 2.9725e-02, PNorm = 129.5766, GNorm = 0.5042, lr_0 = 7.0499e-04
Loss = 3.1488e-02, PNorm = 129.6236, GNorm = 0.6169, lr_0 = 7.0451e-04
Loss = 3.3016e-02, PNorm = 129.6771, GNorm = 0.3421, lr_0 = 7.0403e-04
Loss = 3.6229e-02, PNorm = 129.7310, GNorm = 0.6595, lr_0 = 7.0354e-04
Loss = 3.2435e-02, PNorm = 129.7893, GNorm = 0.4572, lr_0 = 7.0306e-04
Loss = 3.4354e-02, PNorm = 129.8490, GNorm = 0.7504, lr_0 = 7.0258e-04
Loss = 3.1480e-02, PNorm = 129.9064, GNorm = 0.8820, lr_0 = 7.0210e-04
Loss = 3.3369e-02, PNorm = 129.9640, GNorm = 0.4946, lr_0 = 7.0162e-04
Loss = 3.3720e-02, PNorm = 130.0191, GNorm = 0.2245, lr_0 = 7.0114e-04
Loss = 2.9662e-02, PNorm = 130.0767, GNorm = 0.2378, lr_0 = 7.0066e-04
Loss = 3.0465e-02, PNorm = 130.1306, GNorm = 0.2993, lr_0 = 7.0018e-04
Loss = 2.9802e-02, PNorm = 130.1838, GNorm = 0.3761, lr_0 = 6.9970e-04
Loss = 3.1159e-02, PNorm = 130.2361, GNorm = 0.2033, lr_0 = 6.9922e-04
Loss = 3.3708e-02, PNorm = 130.2947, GNorm = 0.3234, lr_0 = 6.9874e-04
Loss = 3.3782e-02, PNorm = 130.3497, GNorm = 0.2417, lr_0 = 6.9826e-04
Loss = 3.2853e-02, PNorm = 130.4081, GNorm = 0.3763, lr_0 = 6.9778e-04
Loss = 3.3813e-02, PNorm = 130.4693, GNorm = 0.2501, lr_0 = 6.9730e-04
Loss = 3.3122e-02, PNorm = 130.5313, GNorm = 0.3084, lr_0 = 6.9683e-04
Loss = 3.2444e-02, PNorm = 130.5906, GNorm = 0.2868, lr_0 = 6.9635e-04
Loss = 3.3576e-02, PNorm = 130.6410, GNorm = 0.6221, lr_0 = 6.9587e-04
Loss = 3.2366e-02, PNorm = 130.6951, GNorm = 0.8298, lr_0 = 6.9540e-04
Loss = 3.5066e-02, PNorm = 130.7539, GNorm = 0.3721, lr_0 = 6.9492e-04
Loss = 3.3384e-02, PNorm = 130.8114, GNorm = 1.0228, lr_0 = 6.9444e-04
Loss = 3.1589e-02, PNorm = 130.8694, GNorm = 0.1964, lr_0 = 6.9397e-04
Loss = 3.1767e-02, PNorm = 130.9324, GNorm = 0.3950, lr_0 = 6.9349e-04
Loss = 3.6957e-02, PNorm = 130.9852, GNorm = 0.5552, lr_0 = 6.9302e-04
Loss = 3.0628e-02, PNorm = 131.0458, GNorm = 0.4390, lr_0 = 6.9254e-04
Loss = 3.1846e-02, PNorm = 131.0974, GNorm = 0.2916, lr_0 = 6.9207e-04
Loss = 4.0527e-02, PNorm = 131.1596, GNorm = 0.5513, lr_0 = 6.9159e-04
Loss = 3.5344e-02, PNorm = 131.2156, GNorm = 1.4628, lr_0 = 6.9112e-04
Loss = 3.3745e-02, PNorm = 131.2792, GNorm = 0.3968, lr_0 = 6.9065e-04
Loss = 2.8801e-02, PNorm = 131.3436, GNorm = 0.3974, lr_0 = 6.9017e-04
Loss = 2.7716e-02, PNorm = 131.4012, GNorm = 0.4786, lr_0 = 6.8970e-04
Loss = 3.4437e-02, PNorm = 131.4577, GNorm = 0.5238, lr_0 = 6.8923e-04
Loss = 3.2378e-02, PNorm = 131.5153, GNorm = 0.2702, lr_0 = 6.8876e-04
Loss = 3.9997e-02, PNorm = 131.5657, GNorm = 0.6657, lr_0 = 6.8828e-04
Loss = 3.4123e-02, PNorm = 131.6297, GNorm = 0.3577, lr_0 = 6.8781e-04
Loss = 3.8096e-02, PNorm = 131.6922, GNorm = 0.4975, lr_0 = 6.8734e-04
Loss = 3.2751e-02, PNorm = 131.7566, GNorm = 0.5449, lr_0 = 6.8687e-04
Loss = 3.6459e-02, PNorm = 131.8174, GNorm = 0.3186, lr_0 = 6.8640e-04
Loss = 2.7304e-02, PNorm = 131.8827, GNorm = 0.2312, lr_0 = 6.8593e-04
Loss = 3.2628e-02, PNorm = 131.9459, GNorm = 0.5997, lr_0 = 6.8546e-04
Loss = 3.7326e-02, PNorm = 132.0101, GNorm = 0.3439, lr_0 = 6.8499e-04
Loss = 3.3415e-02, PNorm = 132.0714, GNorm = 0.2969, lr_0 = 6.8452e-04
Loss = 2.9579e-02, PNorm = 132.1290, GNorm = 0.5511, lr_0 = 6.8405e-04
Loss = 3.1585e-02, PNorm = 132.1902, GNorm = 0.3188, lr_0 = 6.8358e-04
Loss = 3.5857e-02, PNorm = 132.2450, GNorm = 0.3959, lr_0 = 6.8312e-04
Loss = 3.3584e-02, PNorm = 132.3077, GNorm = 0.2767, lr_0 = 6.8265e-04
Loss = 3.3697e-02, PNorm = 132.3682, GNorm = 0.4390, lr_0 = 6.8218e-04
Loss = 3.7273e-02, PNorm = 132.4333, GNorm = 0.5591, lr_0 = 6.8171e-04
Loss = 4.0283e-02, PNorm = 132.5016, GNorm = 0.4830, lr_0 = 6.8125e-04
Loss = 3.5522e-02, PNorm = 132.5693, GNorm = 0.6382, lr_0 = 6.8078e-04
Loss = 3.4754e-02, PNorm = 132.6288, GNorm = 0.4057, lr_0 = 6.8031e-04
Loss = 3.2444e-02, PNorm = 132.6919, GNorm = 0.8259, lr_0 = 6.7985e-04
Loss = 3.1276e-02, PNorm = 132.7488, GNorm = 0.2624, lr_0 = 6.7938e-04
Loss = 3.9218e-02, PNorm = 132.8188, GNorm = 0.5304, lr_0 = 6.7892e-04
Loss = 3.5692e-02, PNorm = 132.8824, GNorm = 0.2781, lr_0 = 6.7845e-04
Loss = 3.7712e-02, PNorm = 132.9529, GNorm = 0.6724, lr_0 = 6.7799e-04
Loss = 3.5320e-02, PNorm = 133.0112, GNorm = 0.8676, lr_0 = 6.7752e-04
Loss = 3.3432e-02, PNorm = 133.0740, GNorm = 0.4586, lr_0 = 6.7706e-04
Loss = 3.4151e-02, PNorm = 133.1385, GNorm = 0.5617, lr_0 = 6.7659e-04
Loss = 3.4458e-02, PNorm = 133.1977, GNorm = 0.5056, lr_0 = 6.7613e-04
Loss = 3.4871e-02, PNorm = 133.2675, GNorm = 0.3059, lr_0 = 6.7567e-04
Loss = 3.6514e-02, PNorm = 133.3337, GNorm = 0.7395, lr_0 = 6.7520e-04
Loss = 4.0684e-02, PNorm = 133.4007, GNorm = 0.2107, lr_0 = 6.7474e-04
Loss = 3.8054e-02, PNorm = 133.4685, GNorm = 1.0230, lr_0 = 6.7428e-04
Loss = 3.2501e-02, PNorm = 133.5355, GNorm = 0.2666, lr_0 = 6.7382e-04
Loss = 3.1425e-02, PNorm = 133.5983, GNorm = 0.2646, lr_0 = 6.7335e-04
Loss = 3.3718e-02, PNorm = 133.6592, GNorm = 0.4756, lr_0 = 6.7289e-04
Loss = 3.4205e-02, PNorm = 133.7190, GNorm = 0.2205, lr_0 = 6.7243e-04
Loss = 3.2604e-02, PNorm = 133.7834, GNorm = 0.4392, lr_0 = 6.7197e-04
Loss = 3.1355e-02, PNorm = 133.8507, GNorm = 0.4501, lr_0 = 6.7151e-04
Loss = 3.3357e-02, PNorm = 133.9100, GNorm = 0.3078, lr_0 = 6.7105e-04
Loss = 3.3047e-02, PNorm = 133.9644, GNorm = 0.5012, lr_0 = 6.7059e-04
Loss = 3.3726e-02, PNorm = 134.0299, GNorm = 0.4993, lr_0 = 6.7013e-04
Loss = 3.8487e-02, PNorm = 134.1006, GNorm = 0.4082, lr_0 = 6.6967e-04
Loss = 3.8974e-02, PNorm = 134.1749, GNorm = 0.4222, lr_0 = 6.6921e-04
Loss = 3.4150e-02, PNorm = 134.2497, GNorm = 0.5781, lr_0 = 6.6876e-04
Loss = 3.3835e-02, PNorm = 134.3168, GNorm = 0.3237, lr_0 = 6.6830e-04
Loss = 3.6523e-02, PNorm = 134.3791, GNorm = 0.3150, lr_0 = 6.6784e-04
Loss = 3.1751e-02, PNorm = 134.4453, GNorm = 0.5816, lr_0 = 6.6738e-04
Loss = 3.1274e-02, PNorm = 134.5047, GNorm = 0.5852, lr_0 = 6.6693e-04
Loss = 3.5694e-02, PNorm = 134.5711, GNorm = 0.7695, lr_0 = 6.6647e-04
Loss = 3.1975e-02, PNorm = 134.6415, GNorm = 0.2307, lr_0 = 6.6601e-04
Loss = 3.9399e-02, PNorm = 134.7092, GNorm = 0.3893, lr_0 = 6.6556e-04
Loss = 3.5921e-02, PNorm = 134.7669, GNorm = 0.4655, lr_0 = 6.6510e-04
Loss = 4.0623e-02, PNorm = 134.8424, GNorm = 0.5789, lr_0 = 6.6464e-04
Loss = 2.9146e-02, PNorm = 134.9168, GNorm = 0.3662, lr_0 = 6.6419e-04
Loss = 3.7237e-02, PNorm = 134.9894, GNorm = 0.3269, lr_0 = 6.6373e-04
Loss = 3.0760e-02, PNorm = 135.0512, GNorm = 0.2508, lr_0 = 6.6328e-04
Loss = 4.1687e-02, PNorm = 135.1185, GNorm = 0.4453, lr_0 = 6.6282e-04
Validation mae = 0.289263
Epoch 7
Loss = 2.7627e-02, PNorm = 135.1833, GNorm = 0.6452, lr_0 = 6.6237e-04
Loss = 3.1067e-02, PNorm = 135.2385, GNorm = 0.5207, lr_0 = 6.6192e-04
Loss = 3.0391e-02, PNorm = 135.2895, GNorm = 0.2005, lr_0 = 6.6146e-04
Loss = 2.6405e-02, PNorm = 135.3353, GNorm = 0.4915, lr_0 = 6.6101e-04
Loss = 3.7099e-02, PNorm = 135.3838, GNorm = 0.2539, lr_0 = 6.6056e-04
Loss = 2.4571e-02, PNorm = 135.4322, GNorm = 0.5143, lr_0 = 6.6011e-04
Loss = 3.0765e-02, PNorm = 135.4705, GNorm = 0.5277, lr_0 = 6.5965e-04
Loss = 2.6348e-02, PNorm = 135.5147, GNorm = 0.2977, lr_0 = 6.5920e-04
Loss = 2.4335e-02, PNorm = 135.5642, GNorm = 0.4084, lr_0 = 6.5875e-04
Loss = 2.3492e-02, PNorm = 135.6087, GNorm = 0.3755, lr_0 = 6.5830e-04
Loss = 2.8530e-02, PNorm = 135.6550, GNorm = 0.2090, lr_0 = 6.5785e-04
Loss = 3.1848e-02, PNorm = 135.7030, GNorm = 0.3681, lr_0 = 6.5740e-04
Loss = 2.5696e-02, PNorm = 135.7550, GNorm = 0.4371, lr_0 = 6.5695e-04
Loss = 2.8907e-02, PNorm = 135.8081, GNorm = 1.1920, lr_0 = 6.5650e-04
Loss = 2.8497e-02, PNorm = 135.8574, GNorm = 0.9513, lr_0 = 6.5605e-04
Loss = 2.5034e-02, PNorm = 135.9041, GNorm = 0.5116, lr_0 = 6.5560e-04
Loss = 2.2657e-02, PNorm = 135.9487, GNorm = 0.2955, lr_0 = 6.5515e-04
Loss = 2.9961e-02, PNorm = 135.9942, GNorm = 0.8119, lr_0 = 6.5470e-04
Loss = 2.5167e-02, PNorm = 136.0396, GNorm = 0.4954, lr_0 = 6.5425e-04
Loss = 3.2005e-02, PNorm = 136.0869, GNorm = 0.3392, lr_0 = 6.5380e-04
Loss = 3.0963e-02, PNorm = 136.1371, GNorm = 0.4933, lr_0 = 6.5335e-04
Loss = 2.7121e-02, PNorm = 136.1905, GNorm = 0.2340, lr_0 = 6.5291e-04
Loss = 2.6439e-02, PNorm = 136.2370, GNorm = 0.3764, lr_0 = 6.5246e-04
Loss = 2.6711e-02, PNorm = 136.2848, GNorm = 0.2908, lr_0 = 6.5201e-04
Loss = 2.8341e-02, PNorm = 136.3314, GNorm = 1.3551, lr_0 = 6.5157e-04
Loss = 3.3222e-02, PNorm = 136.3752, GNorm = 0.4997, lr_0 = 6.5112e-04
Loss = 2.3346e-02, PNorm = 136.4208, GNorm = 0.2529, lr_0 = 6.5067e-04
Loss = 2.6277e-02, PNorm = 136.4669, GNorm = 0.3423, lr_0 = 6.5023e-04
Loss = 3.1160e-02, PNorm = 136.5146, GNorm = 0.5161, lr_0 = 6.4978e-04
Loss = 3.0914e-02, PNorm = 136.5651, GNorm = 0.5309, lr_0 = 6.4934e-04
Loss = 3.5812e-02, PNorm = 136.6240, GNorm = 0.4025, lr_0 = 6.4889e-04
Loss = 2.3244e-02, PNorm = 136.6794, GNorm = 0.2264, lr_0 = 6.4845e-04
Loss = 2.6634e-02, PNorm = 136.7303, GNorm = 0.3596, lr_0 = 6.4800e-04
Loss = 2.4877e-02, PNorm = 136.7777, GNorm = 0.4682, lr_0 = 6.4756e-04
Loss = 2.6895e-02, PNorm = 136.8308, GNorm = 0.4215, lr_0 = 6.4712e-04
Loss = 2.3089e-02, PNorm = 136.8755, GNorm = 0.2389, lr_0 = 6.4667e-04
Loss = 2.9819e-02, PNorm = 136.9226, GNorm = 0.3173, lr_0 = 6.4623e-04
Loss = 2.5479e-02, PNorm = 136.9664, GNorm = 0.1952, lr_0 = 6.4579e-04
Loss = 2.5305e-02, PNorm = 137.0178, GNorm = 0.6439, lr_0 = 6.4534e-04
Loss = 2.7998e-02, PNorm = 137.0664, GNorm = 0.8522, lr_0 = 6.4490e-04
Loss = 2.5481e-02, PNorm = 137.1205, GNorm = 0.5998, lr_0 = 6.4446e-04
Loss = 2.2697e-02, PNorm = 137.1675, GNorm = 0.2032, lr_0 = 6.4402e-04
Loss = 2.5699e-02, PNorm = 137.2167, GNorm = 0.3441, lr_0 = 6.4358e-04
Loss = 2.3814e-02, PNorm = 137.2632, GNorm = 0.2800, lr_0 = 6.4314e-04
Loss = 2.4908e-02, PNorm = 137.3127, GNorm = 0.6102, lr_0 = 6.4270e-04
Loss = 2.5236e-02, PNorm = 137.3594, GNorm = 0.1929, lr_0 = 6.4226e-04
Loss = 2.2614e-02, PNorm = 137.4083, GNorm = 0.4951, lr_0 = 6.4182e-04
Loss = 2.7347e-02, PNorm = 137.4639, GNorm = 0.3517, lr_0 = 6.4138e-04
Loss = 2.9491e-02, PNorm = 137.5207, GNorm = 0.3445, lr_0 = 6.4094e-04
Loss = 2.8756e-02, PNorm = 137.5775, GNorm = 0.8263, lr_0 = 6.4050e-04
Loss = 2.8534e-02, PNorm = 137.6324, GNorm = 0.2433, lr_0 = 6.4006e-04
Loss = 2.6234e-02, PNorm = 137.6825, GNorm = 0.6987, lr_0 = 6.3962e-04
Loss = 2.8989e-02, PNorm = 137.7328, GNorm = 0.9641, lr_0 = 6.3918e-04
Loss = 2.6818e-02, PNorm = 137.7925, GNorm = 0.6994, lr_0 = 6.3874e-04
Loss = 2.6322e-02, PNorm = 137.8405, GNorm = 0.6399, lr_0 = 6.3831e-04
Loss = 2.6499e-02, PNorm = 137.8928, GNorm = 0.3074, lr_0 = 6.3787e-04
Loss = 2.3394e-02, PNorm = 137.9477, GNorm = 0.2173, lr_0 = 6.3743e-04
Loss = 2.8922e-02, PNorm = 137.9960, GNorm = 0.2107, lr_0 = 6.3700e-04
Loss = 3.2076e-02, PNorm = 138.0531, GNorm = 0.6341, lr_0 = 6.3656e-04
Loss = 3.0694e-02, PNorm = 138.1087, GNorm = 0.3132, lr_0 = 6.3612e-04
Loss = 2.3787e-02, PNorm = 138.1647, GNorm = 0.2184, lr_0 = 6.3569e-04
Loss = 2.6835e-02, PNorm = 138.2167, GNorm = 0.7043, lr_0 = 6.3525e-04
Loss = 2.9563e-02, PNorm = 138.2709, GNorm = 0.3427, lr_0 = 6.3482e-04
Loss = 2.7615e-02, PNorm = 138.3236, GNorm = 0.3344, lr_0 = 6.3438e-04
Loss = 2.7020e-02, PNorm = 138.3782, GNorm = 0.2477, lr_0 = 6.3395e-04
Loss = 2.6081e-02, PNorm = 138.4255, GNorm = 1.3277, lr_0 = 6.3351e-04
Loss = 2.6677e-02, PNorm = 138.4760, GNorm = 0.4361, lr_0 = 6.3308e-04
Loss = 2.4364e-02, PNorm = 138.5298, GNorm = 0.5395, lr_0 = 6.3265e-04
Loss = 2.6605e-02, PNorm = 138.5809, GNorm = 0.4691, lr_0 = 6.3221e-04
Loss = 2.4591e-02, PNorm = 138.6337, GNorm = 0.2292, lr_0 = 6.3178e-04
Loss = 2.4352e-02, PNorm = 138.6851, GNorm = 0.4279, lr_0 = 6.3135e-04
Loss = 2.7285e-02, PNorm = 138.7343, GNorm = 0.3481, lr_0 = 6.3091e-04
Loss = 2.8537e-02, PNorm = 138.7856, GNorm = 0.4416, lr_0 = 6.3048e-04
Loss = 2.1756e-02, PNorm = 138.8410, GNorm = 0.5998, lr_0 = 6.3005e-04
Loss = 2.4904e-02, PNorm = 138.8949, GNorm = 0.3781, lr_0 = 6.2962e-04
Loss = 2.9045e-02, PNorm = 138.9539, GNorm = 0.2431, lr_0 = 6.2919e-04
Loss = 2.5899e-02, PNorm = 139.0124, GNorm = 0.3239, lr_0 = 6.2876e-04
Loss = 2.7094e-02, PNorm = 139.0728, GNorm = 0.4231, lr_0 = 6.2833e-04
Loss = 2.5937e-02, PNorm = 139.1292, GNorm = 0.3458, lr_0 = 6.2789e-04
Loss = 2.8006e-02, PNorm = 139.1840, GNorm = 0.9359, lr_0 = 6.2746e-04
Loss = 2.5771e-02, PNorm = 139.2327, GNorm = 0.4056, lr_0 = 6.2703e-04
Loss = 2.8161e-02, PNorm = 139.2866, GNorm = 0.2057, lr_0 = 6.2661e-04
Loss = 2.4840e-02, PNorm = 139.3411, GNorm = 0.7899, lr_0 = 6.2618e-04
Loss = 2.6657e-02, PNorm = 139.3867, GNorm = 0.3835, lr_0 = 6.2575e-04
Loss = 2.7226e-02, PNorm = 139.4418, GNorm = 0.5198, lr_0 = 6.2532e-04
Loss = 3.1668e-02, PNorm = 139.5011, GNorm = 0.4322, lr_0 = 6.2489e-04
Loss = 2.4930e-02, PNorm = 139.5602, GNorm = 0.1431, lr_0 = 6.2446e-04
Loss = 2.4138e-02, PNorm = 139.6136, GNorm = 0.8123, lr_0 = 6.2403e-04
Loss = 2.6276e-02, PNorm = 139.6637, GNorm = 0.1770, lr_0 = 6.2361e-04
Loss = 2.7584e-02, PNorm = 139.7151, GNorm = 0.2139, lr_0 = 6.2318e-04
Loss = 2.9045e-02, PNorm = 139.7681, GNorm = 0.4449, lr_0 = 6.2275e-04
Loss = 2.3857e-02, PNorm = 139.8190, GNorm = 0.2727, lr_0 = 6.2233e-04
Loss = 2.9515e-02, PNorm = 139.8660, GNorm = 0.6349, lr_0 = 6.2190e-04
Loss = 2.9539e-02, PNorm = 139.9189, GNorm = 0.7572, lr_0 = 6.2147e-04
Loss = 2.9647e-02, PNorm = 139.9778, GNorm = 0.2993, lr_0 = 6.2105e-04
Loss = 2.3703e-02, PNorm = 140.0359, GNorm = 0.3246, lr_0 = 6.2062e-04
Loss = 3.1515e-02, PNorm = 140.0907, GNorm = 0.5327, lr_0 = 6.2020e-04
Loss = 2.6944e-02, PNorm = 140.1468, GNorm = 0.3363, lr_0 = 6.1977e-04
Loss = 2.6631e-02, PNorm = 140.2027, GNorm = 0.2874, lr_0 = 6.1935e-04
Loss = 2.6238e-02, PNorm = 140.2552, GNorm = 0.9051, lr_0 = 6.1892e-04
Loss = 2.8782e-02, PNorm = 140.3181, GNorm = 0.5019, lr_0 = 6.1850e-04
Loss = 3.0330e-02, PNorm = 140.3741, GNorm = 0.4504, lr_0 = 6.1808e-04
Loss = 2.6591e-02, PNorm = 140.4345, GNorm = 0.2886, lr_0 = 6.1765e-04
Loss = 2.7544e-02, PNorm = 140.4865, GNorm = 0.2445, lr_0 = 6.1723e-04
Loss = 3.1287e-02, PNorm = 140.5416, GNorm = 0.4857, lr_0 = 6.1681e-04
Loss = 2.8406e-02, PNorm = 140.6021, GNorm = 0.2189, lr_0 = 6.1638e-04
Loss = 2.5010e-02, PNorm = 140.6589, GNorm = 0.3034, lr_0 = 6.1596e-04
Loss = 2.8628e-02, PNorm = 140.7099, GNorm = 0.5026, lr_0 = 6.1554e-04
Loss = 2.8015e-02, PNorm = 140.7669, GNorm = 0.5182, lr_0 = 6.1512e-04
Loss = 2.6495e-02, PNorm = 140.8215, GNorm = 0.3633, lr_0 = 6.1470e-04
Loss = 3.0876e-02, PNorm = 140.8833, GNorm = 0.8377, lr_0 = 6.1428e-04
Loss = 2.6173e-02, PNorm = 140.9442, GNorm = 0.2328, lr_0 = 6.1385e-04
Loss = 2.7652e-02, PNorm = 141.0100, GNorm = 0.4823, lr_0 = 6.1343e-04
Loss = 2.9399e-02, PNorm = 141.0703, GNorm = 0.3160, lr_0 = 6.1301e-04
Loss = 2.8299e-02, PNorm = 141.1191, GNorm = 0.2060, lr_0 = 6.1259e-04
Loss = 2.4184e-02, PNorm = 141.1746, GNorm = 0.4505, lr_0 = 6.1217e-04
Loss = 2.7465e-02, PNorm = 141.2223, GNorm = 0.1978, lr_0 = 6.1175e-04
Loss = 2.9499e-02, PNorm = 141.2762, GNorm = 0.2822, lr_0 = 6.1134e-04
Loss = 3.3223e-02, PNorm = 141.3312, GNorm = 0.5587, lr_0 = 6.1092e-04
Loss = 2.8979e-02, PNorm = 141.3938, GNorm = 0.5153, lr_0 = 6.1050e-04
Validation mae = 0.284328
Epoch 8
Loss = 2.4754e-02, PNorm = 141.4473, GNorm = 0.4530, lr_0 = 6.1008e-04
Loss = 2.3671e-02, PNorm = 141.4951, GNorm = 0.3389, lr_0 = 6.0966e-04
Loss = 2.4338e-02, PNorm = 141.5428, GNorm = 0.2990, lr_0 = 6.0924e-04
Loss = 2.2108e-02, PNorm = 141.5873, GNorm = 0.3529, lr_0 = 6.0883e-04
Loss = 2.2683e-02, PNorm = 141.6269, GNorm = 0.5772, lr_0 = 6.0841e-04
Loss = 2.2853e-02, PNorm = 141.6716, GNorm = 0.7496, lr_0 = 6.0799e-04
Loss = 2.0869e-02, PNorm = 141.7117, GNorm = 0.1832, lr_0 = 6.0758e-04
Loss = 2.5616e-02, PNorm = 141.7550, GNorm = 0.5070, lr_0 = 6.0716e-04
Loss = 1.9224e-02, PNorm = 141.7970, GNorm = 0.5591, lr_0 = 6.0674e-04
Loss = 2.2643e-02, PNorm = 141.8369, GNorm = 0.9863, lr_0 = 6.0633e-04
Loss = 2.3697e-02, PNorm = 141.8757, GNorm = 0.3910, lr_0 = 6.0591e-04
Loss = 2.5174e-02, PNorm = 141.9157, GNorm = 0.3564, lr_0 = 6.0550e-04
Loss = 2.4292e-02, PNorm = 141.9537, GNorm = 0.4283, lr_0 = 6.0508e-04
Loss = 2.5066e-02, PNorm = 141.9972, GNorm = 0.5531, lr_0 = 6.0467e-04
Loss = 2.3963e-02, PNorm = 142.0454, GNorm = 0.3897, lr_0 = 6.0425e-04
Loss = 2.5962e-02, PNorm = 142.0904, GNorm = 0.3405, lr_0 = 6.0384e-04
Loss = 1.9014e-02, PNorm = 142.1287, GNorm = 0.4939, lr_0 = 6.0343e-04
Loss = 1.9621e-02, PNorm = 142.1657, GNorm = 0.2646, lr_0 = 6.0301e-04
Loss = 2.0418e-02, PNorm = 142.2005, GNorm = 0.3107, lr_0 = 6.0260e-04
Loss = 1.8945e-02, PNorm = 142.2355, GNorm = 0.2365, lr_0 = 6.0219e-04
Loss = 1.8780e-02, PNorm = 142.2687, GNorm = 0.2760, lr_0 = 6.0178e-04
Loss = 2.0435e-02, PNorm = 142.3039, GNorm = 0.1650, lr_0 = 6.0136e-04
Loss = 1.7017e-02, PNorm = 142.3424, GNorm = 0.2936, lr_0 = 6.0095e-04
Loss = 1.8026e-02, PNorm = 142.3813, GNorm = 0.2912, lr_0 = 6.0054e-04
Loss = 2.0699e-02, PNorm = 142.4206, GNorm = 0.3390, lr_0 = 6.0013e-04
Loss = 2.1892e-02, PNorm = 142.4582, GNorm = 0.2634, lr_0 = 5.9972e-04
Loss = 2.1430e-02, PNorm = 142.4946, GNorm = 0.4056, lr_0 = 5.9931e-04
Loss = 2.2965e-02, PNorm = 142.5343, GNorm = 0.4700, lr_0 = 5.9890e-04
Loss = 2.2130e-02, PNorm = 142.5711, GNorm = 0.5690, lr_0 = 5.9849e-04
Loss = 1.9247e-02, PNorm = 142.6074, GNorm = 0.4431, lr_0 = 5.9808e-04
Loss = 2.1712e-02, PNorm = 142.6396, GNorm = 0.5479, lr_0 = 5.9767e-04
Loss = 1.9347e-02, PNorm = 142.6783, GNorm = 0.1879, lr_0 = 5.9726e-04
Loss = 2.1053e-02, PNorm = 142.7159, GNorm = 0.4595, lr_0 = 5.9685e-04
Loss = 2.0044e-02, PNorm = 142.7517, GNorm = 0.4702, lr_0 = 5.9644e-04
Loss = 1.8577e-02, PNorm = 142.7909, GNorm = 0.2173, lr_0 = 5.9603e-04
Loss = 2.0299e-02, PNorm = 142.8301, GNorm = 0.4756, lr_0 = 5.9562e-04
Loss = 2.2076e-02, PNorm = 142.8730, GNorm = 0.2114, lr_0 = 5.9521e-04
Loss = 1.8443e-02, PNorm = 142.9127, GNorm = 0.3631, lr_0 = 5.9481e-04
Loss = 1.8731e-02, PNorm = 142.9583, GNorm = 0.1974, lr_0 = 5.9440e-04
Loss = 2.1565e-02, PNorm = 142.9960, GNorm = 0.2615, lr_0 = 5.9399e-04
Loss = 1.8051e-02, PNorm = 143.0300, GNorm = 0.2532, lr_0 = 5.9358e-04
Loss = 2.3450e-02, PNorm = 143.0650, GNorm = 0.3389, lr_0 = 5.9318e-04
Loss = 2.0169e-02, PNorm = 143.1025, GNorm = 0.2996, lr_0 = 5.9277e-04
Loss = 2.3857e-02, PNorm = 143.1419, GNorm = 0.5240, lr_0 = 5.9236e-04
Loss = 1.7825e-02, PNorm = 143.1778, GNorm = 0.5331, lr_0 = 5.9196e-04
Loss = 2.3259e-02, PNorm = 143.2197, GNorm = 0.3117, lr_0 = 5.9155e-04
Loss = 1.9323e-02, PNorm = 143.2582, GNorm = 0.2682, lr_0 = 5.9115e-04
Loss = 1.9270e-02, PNorm = 143.2940, GNorm = 0.7320, lr_0 = 5.9074e-04
Loss = 2.2839e-02, PNorm = 143.3306, GNorm = 0.4015, lr_0 = 5.9034e-04
Loss = 1.9724e-02, PNorm = 143.3753, GNorm = 0.3658, lr_0 = 5.8993e-04
Loss = 2.6115e-02, PNorm = 143.4193, GNorm = 0.2189, lr_0 = 5.8953e-04
Loss = 2.0050e-02, PNorm = 143.4600, GNorm = 0.3931, lr_0 = 5.8913e-04
Loss = 1.9731e-02, PNorm = 143.5008, GNorm = 0.1731, lr_0 = 5.8872e-04
Loss = 2.2130e-02, PNorm = 143.5383, GNorm = 0.1615, lr_0 = 5.8832e-04
Loss = 1.9248e-02, PNorm = 143.5817, GNorm = 0.3032, lr_0 = 5.8792e-04
Loss = 2.1503e-02, PNorm = 143.6233, GNorm = 0.3020, lr_0 = 5.8751e-04
Loss = 2.2598e-02, PNorm = 143.6666, GNorm = 0.4360, lr_0 = 5.8711e-04
Loss = 2.1956e-02, PNorm = 143.7094, GNorm = 0.4744, lr_0 = 5.8671e-04
Loss = 2.2431e-02, PNorm = 143.7482, GNorm = 0.9685, lr_0 = 5.8631e-04
Loss = 2.1658e-02, PNorm = 143.7944, GNorm = 0.5545, lr_0 = 5.8591e-04
Loss = 2.2614e-02, PNorm = 143.8423, GNorm = 0.7309, lr_0 = 5.8550e-04
Loss = 2.5545e-02, PNorm = 143.8914, GNorm = 0.2706, lr_0 = 5.8510e-04
Loss = 2.0201e-02, PNorm = 143.9298, GNorm = 0.2494, lr_0 = 5.8470e-04
Loss = 2.1820e-02, PNorm = 143.9684, GNorm = 0.3125, lr_0 = 5.8430e-04
Loss = 2.0024e-02, PNorm = 144.0103, GNorm = 0.4564, lr_0 = 5.8390e-04
Loss = 2.3640e-02, PNorm = 144.0577, GNorm = 0.2317, lr_0 = 5.8350e-04
Loss = 1.8817e-02, PNorm = 144.1001, GNorm = 0.6313, lr_0 = 5.8310e-04
Loss = 1.9237e-02, PNorm = 144.1365, GNorm = 0.3718, lr_0 = 5.8270e-04
Loss = 1.9405e-02, PNorm = 144.1710, GNorm = 0.3635, lr_0 = 5.8230e-04
Loss = 1.6930e-02, PNorm = 144.2083, GNorm = 0.2524, lr_0 = 5.8190e-04
Loss = 2.0694e-02, PNorm = 144.2481, GNorm = 0.4808, lr_0 = 5.8151e-04
Loss = 1.8172e-02, PNorm = 144.2859, GNorm = 0.3994, lr_0 = 5.8111e-04
Loss = 1.9788e-02, PNorm = 144.3294, GNorm = 0.4921, lr_0 = 5.8071e-04
Loss = 2.0930e-02, PNorm = 144.3729, GNorm = 0.4723, lr_0 = 5.8031e-04
Loss = 1.7947e-02, PNorm = 144.4160, GNorm = 0.5095, lr_0 = 5.7991e-04
Loss = 2.0083e-02, PNorm = 144.4560, GNorm = 0.5262, lr_0 = 5.7952e-04
Loss = 2.0334e-02, PNorm = 144.4945, GNorm = 0.1415, lr_0 = 5.7912e-04
Loss = 2.2790e-02, PNorm = 144.5350, GNorm = 0.2416, lr_0 = 5.7872e-04
Loss = 2.5420e-02, PNorm = 144.5758, GNorm = 0.3689, lr_0 = 5.7833e-04
Loss = 2.1110e-02, PNorm = 144.6179, GNorm = 0.7034, lr_0 = 5.7793e-04
Loss = 2.3430e-02, PNorm = 144.6653, GNorm = 0.2862, lr_0 = 5.7753e-04
Loss = 2.0718e-02, PNorm = 144.7188, GNorm = 0.6708, lr_0 = 5.7714e-04
Loss = 2.1778e-02, PNorm = 144.7670, GNorm = 0.4397, lr_0 = 5.7674e-04
Loss = 2.1698e-02, PNorm = 144.8092, GNorm = 0.2661, lr_0 = 5.7635e-04
Loss = 1.7870e-02, PNorm = 144.8531, GNorm = 0.2365, lr_0 = 5.7595e-04
Loss = 2.5805e-02, PNorm = 144.8992, GNorm = 0.6986, lr_0 = 5.7556e-04
Loss = 2.1699e-02, PNorm = 144.9535, GNorm = 0.4601, lr_0 = 5.7516e-04
Loss = 2.0181e-02, PNorm = 144.9994, GNorm = 0.3039, lr_0 = 5.7477e-04
Loss = 1.7670e-02, PNorm = 145.0419, GNorm = 0.2902, lr_0 = 5.7438e-04
Loss = 2.0126e-02, PNorm = 145.0849, GNorm = 0.2266, lr_0 = 5.7398e-04
Loss = 2.2920e-02, PNorm = 145.1313, GNorm = 0.3499, lr_0 = 5.7359e-04
Loss = 2.2048e-02, PNorm = 145.1786, GNorm = 0.6292, lr_0 = 5.7320e-04
Loss = 1.9944e-02, PNorm = 145.2189, GNorm = 0.1694, lr_0 = 5.7280e-04
Loss = 2.3418e-02, PNorm = 145.2652, GNorm = 0.7516, lr_0 = 5.7241e-04
Loss = 2.1192e-02, PNorm = 145.3150, GNorm = 0.1592, lr_0 = 5.7202e-04
Loss = 2.0101e-02, PNorm = 145.3605, GNorm = 0.2310, lr_0 = 5.7163e-04
Loss = 1.8050e-02, PNorm = 145.4029, GNorm = 0.1919, lr_0 = 5.7124e-04
Loss = 2.2260e-02, PNorm = 145.4450, GNorm = 0.3282, lr_0 = 5.7084e-04
Loss = 2.2847e-02, PNorm = 145.4850, GNorm = 0.2566, lr_0 = 5.7045e-04
Loss = 2.3848e-02, PNorm = 145.5324, GNorm = 0.3301, lr_0 = 5.7006e-04
Loss = 2.1571e-02, PNorm = 145.5805, GNorm = 0.5315, lr_0 = 5.6967e-04
Loss = 2.5733e-02, PNorm = 145.6347, GNorm = 0.6916, lr_0 = 5.6928e-04
Loss = 2.1297e-02, PNorm = 145.6908, GNorm = 0.1519, lr_0 = 5.6889e-04
Loss = 1.9400e-02, PNorm = 145.7441, GNorm = 0.3477, lr_0 = 5.6850e-04
Loss = 2.4160e-02, PNorm = 145.7954, GNorm = 0.3751, lr_0 = 5.6811e-04
Loss = 1.9110e-02, PNorm = 145.8405, GNorm = 0.2006, lr_0 = 5.6772e-04
Loss = 2.3663e-02, PNorm = 145.8823, GNorm = 0.2321, lr_0 = 5.6733e-04
Loss = 2.0491e-02, PNorm = 145.9323, GNorm = 0.2305, lr_0 = 5.6695e-04
Loss = 1.8956e-02, PNorm = 145.9782, GNorm = 0.6453, lr_0 = 5.6656e-04
Loss = 2.4186e-02, PNorm = 146.0280, GNorm = 0.4037, lr_0 = 5.6617e-04
Loss = 2.5671e-02, PNorm = 146.0773, GNorm = 0.7603, lr_0 = 5.6578e-04
Loss = 2.3793e-02, PNorm = 146.1326, GNorm = 0.3522, lr_0 = 5.6539e-04
Loss = 2.1142e-02, PNorm = 146.1851, GNorm = 0.5754, lr_0 = 5.6501e-04
Loss = 2.1226e-02, PNorm = 146.2360, GNorm = 0.2513, lr_0 = 5.6462e-04
Loss = 2.3783e-02, PNorm = 146.2877, GNorm = 0.3705, lr_0 = 5.6423e-04
Loss = 2.1399e-02, PNorm = 146.3392, GNorm = 0.1809, lr_0 = 5.6385e-04
Loss = 2.5088e-02, PNorm = 146.3903, GNorm = 0.7658, lr_0 = 5.6346e-04
Loss = 2.4208e-02, PNorm = 146.4376, GNorm = 0.2127, lr_0 = 5.6307e-04
Loss = 2.4991e-02, PNorm = 146.4920, GNorm = 0.2258, lr_0 = 5.6269e-04
Loss = 2.3125e-02, PNorm = 146.5438, GNorm = 0.2937, lr_0 = 5.6230e-04
Validation mae = 0.285036
Epoch 9
Loss = 2.0540e-02, PNorm = 146.5871, GNorm = 0.4718, lr_0 = 5.6192e-04
Loss = 2.2811e-02, PNorm = 146.6204, GNorm = 0.3456, lr_0 = 5.6153e-04
Loss = 1.8381e-02, PNorm = 146.6564, GNorm = 0.3591, lr_0 = 5.6115e-04
Loss = 1.7213e-02, PNorm = 146.6875, GNorm = 0.4471, lr_0 = 5.6076e-04
Loss = 1.5713e-02, PNorm = 146.7192, GNorm = 0.2657, lr_0 = 5.6038e-04
Loss = 2.0525e-02, PNorm = 146.7540, GNorm = 0.3210, lr_0 = 5.6000e-04
Loss = 1.9022e-02, PNorm = 146.7892, GNorm = 0.3166, lr_0 = 5.5961e-04
Loss = 1.9305e-02, PNorm = 146.8247, GNorm = 0.9960, lr_0 = 5.5923e-04
Loss = 2.0053e-02, PNorm = 146.8612, GNorm = 0.3295, lr_0 = 5.5885e-04
Loss = 1.8271e-02, PNorm = 146.8974, GNorm = 0.1794, lr_0 = 5.5846e-04
Loss = 2.0010e-02, PNorm = 146.9347, GNorm = 0.2734, lr_0 = 5.5808e-04
Loss = 1.8166e-02, PNorm = 146.9675, GNorm = 0.4271, lr_0 = 5.5770e-04
Loss = 1.6571e-02, PNorm = 146.9997, GNorm = 0.3318, lr_0 = 5.5732e-04
Loss = 1.6391e-02, PNorm = 147.0247, GNorm = 0.1552, lr_0 = 5.5693e-04
Loss = 1.8454e-02, PNorm = 147.0505, GNorm = 0.2041, lr_0 = 5.5655e-04
Loss = 1.9180e-02, PNorm = 147.0809, GNorm = 0.3283, lr_0 = 5.5617e-04
Loss = 1.7370e-02, PNorm = 147.1201, GNorm = 0.6108, lr_0 = 5.5579e-04
Loss = 1.7918e-02, PNorm = 147.1550, GNorm = 0.2279, lr_0 = 5.5541e-04
Loss = 1.4743e-02, PNorm = 147.1884, GNorm = 0.3539, lr_0 = 5.5503e-04
Loss = 1.7833e-02, PNorm = 147.2215, GNorm = 0.4826, lr_0 = 5.5465e-04
Loss = 1.5312e-02, PNorm = 147.2567, GNorm = 0.1768, lr_0 = 5.5427e-04
Loss = 1.8421e-02, PNorm = 147.2905, GNorm = 0.1878, lr_0 = 5.5389e-04
Loss = 2.0545e-02, PNorm = 147.3252, GNorm = 0.3939, lr_0 = 5.5351e-04
Loss = 1.5975e-02, PNorm = 147.3596, GNorm = 0.1508, lr_0 = 5.5313e-04
Loss = 1.8115e-02, PNorm = 147.3865, GNorm = 0.5824, lr_0 = 5.5275e-04
Loss = 1.7173e-02, PNorm = 147.4094, GNorm = 0.3961, lr_0 = 5.5237e-04
Loss = 1.8168e-02, PNorm = 147.4412, GNorm = 0.1946, lr_0 = 5.5199e-04
Loss = 2.2281e-02, PNorm = 147.4720, GNorm = 0.4465, lr_0 = 5.5162e-04
Loss = 1.8800e-02, PNorm = 147.5081, GNorm = 0.2555, lr_0 = 5.5124e-04
Loss = 1.9169e-02, PNorm = 147.5442, GNorm = 0.5346, lr_0 = 5.5086e-04
Loss = 1.9333e-02, PNorm = 147.5825, GNorm = 0.5104, lr_0 = 5.5048e-04
Loss = 1.5702e-02, PNorm = 147.6200, GNorm = 0.1536, lr_0 = 5.5011e-04
Loss = 1.5775e-02, PNorm = 147.6558, GNorm = 0.2671, lr_0 = 5.4973e-04
Loss = 1.7378e-02, PNorm = 147.6918, GNorm = 0.2075, lr_0 = 5.4935e-04
Loss = 1.5466e-02, PNorm = 147.7268, GNorm = 0.5434, lr_0 = 5.4898e-04
Loss = 1.6349e-02, PNorm = 147.7558, GNorm = 0.1363, lr_0 = 5.4860e-04
Loss = 1.6218e-02, PNorm = 147.7928, GNorm = 0.2674, lr_0 = 5.4822e-04
Loss = 1.7251e-02, PNorm = 147.8244, GNorm = 0.3373, lr_0 = 5.4785e-04
Loss = 1.7965e-02, PNorm = 147.8543, GNorm = 0.3254, lr_0 = 5.4747e-04
Loss = 1.9304e-02, PNorm = 147.8908, GNorm = 0.1474, lr_0 = 5.4710e-04
Loss = 1.5138e-02, PNorm = 147.9286, GNorm = 0.3907, lr_0 = 5.4672e-04
Loss = 1.6046e-02, PNorm = 147.9672, GNorm = 0.5861, lr_0 = 5.4635e-04
Loss = 1.7205e-02, PNorm = 148.0034, GNorm = 0.6389, lr_0 = 5.4597e-04
Loss = 1.9674e-02, PNorm = 148.0370, GNorm = 0.1805, lr_0 = 5.4560e-04
Loss = 1.5706e-02, PNorm = 148.0700, GNorm = 0.3055, lr_0 = 5.4523e-04
Loss = 1.5054e-02, PNorm = 148.1023, GNorm = 0.1758, lr_0 = 5.4485e-04
Loss = 1.6968e-02, PNorm = 148.1398, GNorm = 0.3069, lr_0 = 5.4448e-04
Loss = 1.7570e-02, PNorm = 148.1778, GNorm = 0.7701, lr_0 = 5.4411e-04
Loss = 1.4805e-02, PNorm = 148.2120, GNorm = 0.1736, lr_0 = 5.4373e-04
Loss = 1.7263e-02, PNorm = 148.2470, GNorm = 0.1651, lr_0 = 5.4336e-04
Loss = 1.4856e-02, PNorm = 148.2772, GNorm = 0.3899, lr_0 = 5.4299e-04
Loss = 1.6857e-02, PNorm = 148.3058, GNorm = 0.3534, lr_0 = 5.4262e-04
Loss = 1.8099e-02, PNorm = 148.3386, GNorm = 0.3273, lr_0 = 5.4225e-04
Loss = 1.6310e-02, PNorm = 148.3723, GNorm = 0.6528, lr_0 = 5.4187e-04
Loss = 1.6581e-02, PNorm = 148.4080, GNorm = 0.1886, lr_0 = 5.4150e-04
Loss = 1.6625e-02, PNorm = 148.4410, GNorm = 0.5752, lr_0 = 5.4113e-04
Loss = 1.4669e-02, PNorm = 148.4783, GNorm = 0.8435, lr_0 = 5.4076e-04
Loss = 1.5571e-02, PNorm = 148.5130, GNorm = 0.4703, lr_0 = 5.4039e-04
Loss = 1.6740e-02, PNorm = 148.5537, GNorm = 0.2520, lr_0 = 5.4002e-04
Loss = 2.1531e-02, PNorm = 148.5894, GNorm = 0.8826, lr_0 = 5.3965e-04
Loss = 1.8576e-02, PNorm = 148.6189, GNorm = 0.4040, lr_0 = 5.3928e-04
Loss = 1.6988e-02, PNorm = 148.6524, GNorm = 0.1632, lr_0 = 5.3891e-04
Loss = 1.7823e-02, PNorm = 148.6924, GNorm = 0.5268, lr_0 = 5.3854e-04
Loss = 2.1264e-02, PNorm = 148.7329, GNorm = 0.1812, lr_0 = 5.3817e-04
Loss = 1.5494e-02, PNorm = 148.7692, GNorm = 0.2329, lr_0 = 5.3781e-04
Loss = 1.4294e-02, PNorm = 148.8097, GNorm = 0.2229, lr_0 = 5.3744e-04
Loss = 1.8632e-02, PNorm = 148.8447, GNorm = 0.2395, lr_0 = 5.3707e-04
Loss = 1.7582e-02, PNorm = 148.8847, GNorm = 0.2039, lr_0 = 5.3670e-04
Loss = 1.8207e-02, PNorm = 148.9251, GNorm = 0.5636, lr_0 = 5.3633e-04
Loss = 1.5656e-02, PNorm = 148.9658, GNorm = 0.1704, lr_0 = 5.3597e-04
Loss = 1.7634e-02, PNorm = 149.0079, GNorm = 0.5223, lr_0 = 5.3560e-04
Loss = 1.8807e-02, PNorm = 149.0532, GNorm = 0.2474, lr_0 = 5.3523e-04
Loss = 2.1284e-02, PNorm = 149.0975, GNorm = 0.1337, lr_0 = 5.3486e-04
Loss = 2.1247e-02, PNorm = 149.1405, GNorm = 0.4343, lr_0 = 5.3450e-04
Loss = 1.8640e-02, PNorm = 149.1920, GNorm = 0.4556, lr_0 = 5.3413e-04
Loss = 1.6866e-02, PNorm = 149.2356, GNorm = 0.2882, lr_0 = 5.3377e-04
Loss = 1.4917e-02, PNorm = 149.2796, GNorm = 0.2654, lr_0 = 5.3340e-04
Loss = 1.7590e-02, PNorm = 149.3152, GNorm = 0.4277, lr_0 = 5.3304e-04
Loss = 1.6983e-02, PNorm = 149.3514, GNorm = 0.9071, lr_0 = 5.3267e-04
Loss = 2.0941e-02, PNorm = 149.3853, GNorm = 0.2005, lr_0 = 5.3231e-04
Loss = 1.6901e-02, PNorm = 149.4242, GNorm = 0.1595, lr_0 = 5.3194e-04
Loss = 1.5577e-02, PNorm = 149.4689, GNorm = 0.2101, lr_0 = 5.3158e-04
Loss = 1.9605e-02, PNorm = 149.5100, GNorm = 0.3792, lr_0 = 5.3121e-04
Loss = 1.6522e-02, PNorm = 149.5509, GNorm = 0.3701, lr_0 = 5.3085e-04
Loss = 1.7023e-02, PNorm = 149.5913, GNorm = 0.2309, lr_0 = 5.3048e-04
Loss = 1.6012e-02, PNorm = 149.6316, GNorm = 0.1970, lr_0 = 5.3012e-04
Loss = 1.8178e-02, PNorm = 149.6659, GNorm = 0.1886, lr_0 = 5.2976e-04
Loss = 1.7014e-02, PNorm = 149.7056, GNorm = 0.3622, lr_0 = 5.2939e-04
Loss = 1.7784e-02, PNorm = 149.7452, GNorm = 0.2691, lr_0 = 5.2903e-04
Loss = 1.6981e-02, PNorm = 149.7839, GNorm = 0.1923, lr_0 = 5.2867e-04
Loss = 1.7505e-02, PNorm = 149.8291, GNorm = 0.2887, lr_0 = 5.2831e-04
Loss = 1.9035e-02, PNorm = 149.8621, GNorm = 0.4738, lr_0 = 5.2795e-04
Loss = 1.9022e-02, PNorm = 149.8963, GNorm = 0.4645, lr_0 = 5.2758e-04
Loss = 1.6912e-02, PNorm = 149.9338, GNorm = 0.2668, lr_0 = 5.2722e-04
Loss = 1.7999e-02, PNorm = 149.9727, GNorm = 0.2270, lr_0 = 5.2686e-04
Loss = 1.6306e-02, PNorm = 150.0104, GNorm = 0.3410, lr_0 = 5.2650e-04
Loss = 1.6166e-02, PNorm = 150.0466, GNorm = 0.2153, lr_0 = 5.2614e-04
Loss = 1.6014e-02, PNorm = 150.0791, GNorm = 0.2712, lr_0 = 5.2578e-04
Loss = 1.5070e-02, PNorm = 150.1156, GNorm = 0.2937, lr_0 = 5.2542e-04
Loss = 1.8255e-02, PNorm = 150.1584, GNorm = 0.3477, lr_0 = 5.2506e-04
Loss = 1.7804e-02, PNorm = 150.1996, GNorm = 0.2507, lr_0 = 5.2470e-04
Loss = 1.6117e-02, PNorm = 150.2392, GNorm = 0.5282, lr_0 = 5.2434e-04
Loss = 1.7277e-02, PNorm = 150.2763, GNorm = 0.3675, lr_0 = 5.2398e-04
Loss = 2.1817e-02, PNorm = 150.3188, GNorm = 0.3719, lr_0 = 5.2362e-04
Loss = 1.7729e-02, PNorm = 150.3612, GNorm = 0.7517, lr_0 = 5.2326e-04
Loss = 1.7791e-02, PNorm = 150.4022, GNorm = 0.3821, lr_0 = 5.2290e-04
Loss = 2.0649e-02, PNorm = 150.4441, GNorm = 0.5997, lr_0 = 5.2255e-04
Loss = 1.7003e-02, PNorm = 150.4853, GNorm = 0.2057, lr_0 = 5.2219e-04
Loss = 1.7256e-02, PNorm = 150.5294, GNorm = 0.8569, lr_0 = 5.2183e-04
Loss = 1.7535e-02, PNorm = 150.5728, GNorm = 0.4761, lr_0 = 5.2147e-04
Loss = 2.1594e-02, PNorm = 150.6135, GNorm = 0.1794, lr_0 = 5.2112e-04
Loss = 1.8473e-02, PNorm = 150.6499, GNorm = 0.5922, lr_0 = 5.2076e-04
Loss = 1.7319e-02, PNorm = 150.6869, GNorm = 0.7767, lr_0 = 5.2040e-04
Loss = 1.7443e-02, PNorm = 150.7232, GNorm = 0.2089, lr_0 = 5.2005e-04
Loss = 1.7173e-02, PNorm = 150.7587, GNorm = 0.2749, lr_0 = 5.1969e-04
Loss = 1.7819e-02, PNorm = 150.7928, GNorm = 0.3601, lr_0 = 5.1933e-04
Loss = 1.7384e-02, PNorm = 150.8314, GNorm = 0.1406, lr_0 = 5.1898e-04
Loss = 2.0188e-02, PNorm = 150.8748, GNorm = 0.2853, lr_0 = 5.1862e-04
Loss = 1.7533e-02, PNorm = 150.9123, GNorm = 0.3302, lr_0 = 5.1827e-04
Loss = 1.9738e-02, PNorm = 150.9569, GNorm = 0.5836, lr_0 = 5.1791e-04
Validation mae = 0.283287
Epoch 10
Loss = 1.8114e-02, PNorm = 150.9885, GNorm = 0.4157, lr_0 = 5.1756e-04
Loss = 1.4541e-02, PNorm = 151.0214, GNorm = 0.1585, lr_0 = 5.1720e-04
Loss = 1.3506e-02, PNorm = 151.0458, GNorm = 0.2511, lr_0 = 5.1685e-04
Loss = 1.4475e-02, PNorm = 151.0674, GNorm = 0.2091, lr_0 = 5.1649e-04
Loss = 1.5634e-02, PNorm = 151.0904, GNorm = 0.4064, lr_0 = 5.1614e-04
Loss = 1.6027e-02, PNorm = 151.1190, GNorm = 0.6842, lr_0 = 5.1579e-04
Loss = 1.4363e-02, PNorm = 151.1461, GNorm = 0.4158, lr_0 = 5.1543e-04
Loss = 1.3975e-02, PNorm = 151.1732, GNorm = 0.2841, lr_0 = 5.1508e-04
Loss = 1.4179e-02, PNorm = 151.2013, GNorm = 0.5536, lr_0 = 5.1473e-04
Loss = 1.3936e-02, PNorm = 151.2333, GNorm = 0.1882, lr_0 = 5.1437e-04
Loss = 1.8259e-02, PNorm = 151.2615, GNorm = 0.6047, lr_0 = 5.1402e-04
Loss = 1.4314e-02, PNorm = 151.2861, GNorm = 0.4879, lr_0 = 5.1367e-04
Loss = 1.3051e-02, PNorm = 151.3077, GNorm = 0.4016, lr_0 = 5.1332e-04
Loss = 1.5833e-02, PNorm = 151.3300, GNorm = 0.6141, lr_0 = 5.1297e-04
Loss = 1.6702e-02, PNorm = 151.3581, GNorm = 0.4879, lr_0 = 5.1262e-04
Loss = 1.6586e-02, PNorm = 151.3888, GNorm = 0.6981, lr_0 = 5.1226e-04
Loss = 1.3465e-02, PNorm = 151.4207, GNorm = 0.2389, lr_0 = 5.1191e-04
Loss = 1.7495e-02, PNorm = 151.4501, GNorm = 0.4359, lr_0 = 5.1156e-04
Loss = 1.4289e-02, PNorm = 151.4786, GNorm = 0.7116, lr_0 = 5.1121e-04
Loss = 1.3257e-02, PNorm = 151.5100, GNorm = 0.1711, lr_0 = 5.1086e-04
Loss = 1.5586e-02, PNorm = 151.5411, GNorm = 0.3193, lr_0 = 5.1051e-04
Loss = 1.3969e-02, PNorm = 151.5797, GNorm = 0.4517, lr_0 = 5.1016e-04
Loss = 1.3070e-02, PNorm = 151.6113, GNorm = 0.1386, lr_0 = 5.0981e-04
Loss = 1.4938e-02, PNorm = 151.6329, GNorm = 0.3447, lr_0 = 5.0946e-04
Loss = 1.4487e-02, PNorm = 151.6591, GNorm = 0.1881, lr_0 = 5.0911e-04
Loss = 1.8856e-02, PNorm = 151.6834, GNorm = 0.5181, lr_0 = 5.0877e-04
Loss = 1.4844e-02, PNorm = 151.7164, GNorm = 0.6743, lr_0 = 5.0842e-04
Loss = 1.8288e-02, PNorm = 151.7451, GNorm = 0.3796, lr_0 = 5.0807e-04
Loss = 1.6718e-02, PNorm = 151.7760, GNorm = 0.4477, lr_0 = 5.0772e-04
Loss = 1.4053e-02, PNorm = 151.8005, GNorm = 0.5823, lr_0 = 5.0737e-04
Loss = 1.5917e-02, PNorm = 151.8299, GNorm = 0.6694, lr_0 = 5.0703e-04
Loss = 1.3031e-02, PNorm = 151.8612, GNorm = 0.4746, lr_0 = 5.0668e-04
Loss = 1.2093e-02, PNorm = 151.8932, GNorm = 0.1739, lr_0 = 5.0633e-04
Loss = 1.3277e-02, PNorm = 151.9241, GNorm = 0.5995, lr_0 = 5.0598e-04
Loss = 1.3773e-02, PNorm = 151.9541, GNorm = 0.2150, lr_0 = 5.0564e-04
Loss = 1.3953e-02, PNorm = 151.9823, GNorm = 0.1325, lr_0 = 5.0529e-04
Loss = 1.5170e-02, PNorm = 152.0160, GNorm = 0.3671, lr_0 = 5.0494e-04
Loss = 1.7448e-02, PNorm = 152.0452, GNorm = 0.2168, lr_0 = 5.0460e-04
Loss = 1.3773e-02, PNorm = 152.0732, GNorm = 0.3116, lr_0 = 5.0425e-04
Loss = 1.6940e-02, PNorm = 152.1034, GNorm = 0.2218, lr_0 = 5.0391e-04
Loss = 1.4910e-02, PNorm = 152.1419, GNorm = 0.6664, lr_0 = 5.0356e-04
Loss = 1.6960e-02, PNorm = 152.1761, GNorm = 0.2953, lr_0 = 5.0322e-04
Loss = 1.3356e-02, PNorm = 152.2104, GNorm = 0.3209, lr_0 = 5.0287e-04
Loss = 1.4163e-02, PNorm = 152.2409, GNorm = 0.5627, lr_0 = 5.0253e-04
Loss = 1.3504e-02, PNorm = 152.2737, GNorm = 0.3207, lr_0 = 5.0218e-04
Loss = 1.4539e-02, PNorm = 152.3009, GNorm = 0.3564, lr_0 = 5.0184e-04
Loss = 1.2252e-02, PNorm = 152.3309, GNorm = 0.3893, lr_0 = 5.0150e-04
Loss = 1.4629e-02, PNorm = 152.3601, GNorm = 0.2650, lr_0 = 5.0115e-04
Loss = 1.4167e-02, PNorm = 152.3884, GNorm = 0.2039, lr_0 = 5.0081e-04
Loss = 1.2446e-02, PNorm = 152.4151, GNorm = 0.3943, lr_0 = 5.0047e-04
Loss = 1.4595e-02, PNorm = 152.4388, GNorm = 0.2207, lr_0 = 5.0012e-04
Loss = 1.3681e-02, PNorm = 152.4659, GNorm = 0.5455, lr_0 = 4.9978e-04
Loss = 1.4817e-02, PNorm = 152.4962, GNorm = 0.6172, lr_0 = 4.9944e-04
Loss = 1.3030e-02, PNorm = 152.5288, GNorm = 0.2491, lr_0 = 4.9910e-04
Loss = 1.3495e-02, PNorm = 152.5625, GNorm = 0.2395, lr_0 = 4.9875e-04
Loss = 1.2863e-02, PNorm = 152.5921, GNorm = 0.1176, lr_0 = 4.9841e-04
Loss = 1.3915e-02, PNorm = 152.6265, GNorm = 0.2744, lr_0 = 4.9807e-04
Loss = 1.3095e-02, PNorm = 152.6557, GNorm = 0.2829, lr_0 = 4.9773e-04
Loss = 1.7384e-02, PNorm = 152.6824, GNorm = 0.3392, lr_0 = 4.9739e-04
Loss = 1.2765e-02, PNorm = 152.7176, GNorm = 0.4740, lr_0 = 4.9705e-04
Loss = 1.3590e-02, PNorm = 152.7517, GNorm = 0.4615, lr_0 = 4.9671e-04
Loss = 1.6004e-02, PNorm = 152.7829, GNorm = 0.3781, lr_0 = 4.9637e-04
Loss = 1.3725e-02, PNorm = 152.8132, GNorm = 0.1828, lr_0 = 4.9603e-04
Loss = 1.3194e-02, PNorm = 152.8441, GNorm = 0.5881, lr_0 = 4.9569e-04
Loss = 1.3996e-02, PNorm = 152.8805, GNorm = 0.4446, lr_0 = 4.9535e-04
Loss = 1.2484e-02, PNorm = 152.9080, GNorm = 0.2936, lr_0 = 4.9501e-04
Loss = 1.3703e-02, PNorm = 152.9378, GNorm = 0.4793, lr_0 = 4.9467e-04
Loss = 1.4012e-02, PNorm = 152.9652, GNorm = 0.2906, lr_0 = 4.9433e-04
Loss = 1.1514e-02, PNorm = 152.9981, GNorm = 0.1919, lr_0 = 4.9399e-04
Loss = 1.3375e-02, PNorm = 153.0236, GNorm = 0.3624, lr_0 = 4.9365e-04
Loss = 1.3950e-02, PNorm = 153.0516, GNorm = 0.4541, lr_0 = 4.9332e-04
Loss = 1.1610e-02, PNorm = 153.0835, GNorm = 0.1379, lr_0 = 4.9298e-04
Loss = 1.4007e-02, PNorm = 153.1178, GNorm = 0.8662, lr_0 = 4.9264e-04
Loss = 1.3236e-02, PNorm = 153.1516, GNorm = 0.4291, lr_0 = 4.9230e-04
Loss = 1.5188e-02, PNorm = 153.1834, GNorm = 0.2112, lr_0 = 4.9197e-04
Loss = 1.5692e-02, PNorm = 153.2146, GNorm = 0.1138, lr_0 = 4.9163e-04
Loss = 1.5872e-02, PNorm = 153.2461, GNorm = 0.5082, lr_0 = 4.9129e-04
Loss = 1.1770e-02, PNorm = 153.2829, GNorm = 0.5499, lr_0 = 4.9095e-04
Loss = 1.5097e-02, PNorm = 153.3109, GNorm = 0.6055, lr_0 = 4.9062e-04
Loss = 1.5881e-02, PNorm = 153.3406, GNorm = 0.3133, lr_0 = 4.9028e-04
Loss = 1.4221e-02, PNorm = 153.3722, GNorm = 0.2919, lr_0 = 4.8995e-04
Loss = 1.4557e-02, PNorm = 153.4067, GNorm = 0.2623, lr_0 = 4.8961e-04
Loss = 1.3490e-02, PNorm = 153.4390, GNorm = 0.2384, lr_0 = 4.8928e-04
Loss = 1.5082e-02, PNorm = 153.4734, GNorm = 0.2325, lr_0 = 4.8894e-04
Loss = 1.3304e-02, PNorm = 153.5018, GNorm = 0.4576, lr_0 = 4.8861e-04
Loss = 1.2387e-02, PNorm = 153.5327, GNorm = 0.4163, lr_0 = 4.8827e-04
Loss = 1.4346e-02, PNorm = 153.5647, GNorm = 0.3026, lr_0 = 4.8794e-04
Loss = 1.3679e-02, PNorm = 153.5989, GNorm = 0.3450, lr_0 = 4.8760e-04
Loss = 1.4852e-02, PNorm = 153.6329, GNorm = 0.3212, lr_0 = 4.8727e-04
Loss = 1.4632e-02, PNorm = 153.6713, GNorm = 0.5453, lr_0 = 4.8693e-04
Loss = 1.3788e-02, PNorm = 153.7010, GNorm = 0.3007, lr_0 = 4.8660e-04
Loss = 1.3554e-02, PNorm = 153.7331, GNorm = 0.2397, lr_0 = 4.8627e-04
Loss = 1.6819e-02, PNorm = 153.7692, GNorm = 0.5447, lr_0 = 4.8593e-04
Loss = 1.3027e-02, PNorm = 153.8062, GNorm = 0.1973, lr_0 = 4.8560e-04
Loss = 1.3122e-02, PNorm = 153.8398, GNorm = 0.3042, lr_0 = 4.8527e-04
Loss = 1.5733e-02, PNorm = 153.8773, GNorm = 0.4351, lr_0 = 4.8494e-04
Loss = 1.6753e-02, PNorm = 153.9150, GNorm = 0.2213, lr_0 = 4.8460e-04
Loss = 1.8066e-02, PNorm = 153.9514, GNorm = 0.4598, lr_0 = 4.8427e-04
Loss = 1.6559e-02, PNorm = 153.9848, GNorm = 0.4658, lr_0 = 4.8394e-04
Loss = 1.6806e-02, PNorm = 154.0194, GNorm = 0.4776, lr_0 = 4.8361e-04
Loss = 1.3162e-02, PNorm = 154.0543, GNorm = 0.4710, lr_0 = 4.8328e-04
Loss = 1.8520e-02, PNorm = 154.0880, GNorm = 0.1863, lr_0 = 4.8295e-04
Loss = 1.3213e-02, PNorm = 154.1264, GNorm = 0.3478, lr_0 = 4.8262e-04
Loss = 1.4608e-02, PNorm = 154.1588, GNorm = 0.1372, lr_0 = 4.8228e-04
Loss = 1.4090e-02, PNorm = 154.1904, GNorm = 0.9195, lr_0 = 4.8195e-04
Loss = 1.5387e-02, PNorm = 154.2182, GNorm = 0.4300, lr_0 = 4.8162e-04
Loss = 1.3194e-02, PNorm = 154.2529, GNorm = 0.2805, lr_0 = 4.8129e-04
Loss = 1.3929e-02, PNorm = 154.2841, GNorm = 0.1770, lr_0 = 4.8096e-04
Loss = 1.4419e-02, PNorm = 154.3149, GNorm = 0.1551, lr_0 = 4.8064e-04
Loss = 1.4475e-02, PNorm = 154.3505, GNorm = 0.1944, lr_0 = 4.8031e-04
Loss = 1.6402e-02, PNorm = 154.3798, GNorm = 0.2711, lr_0 = 4.7998e-04
Loss = 1.4707e-02, PNorm = 154.4143, GNorm = 0.2104, lr_0 = 4.7965e-04
Loss = 1.1105e-02, PNorm = 154.4490, GNorm = 0.2581, lr_0 = 4.7932e-04
Loss = 1.5671e-02, PNorm = 154.4809, GNorm = 0.1923, lr_0 = 4.7899e-04
Loss = 1.3386e-02, PNorm = 154.5159, GNorm = 0.1221, lr_0 = 4.7866e-04
Loss = 1.4357e-02, PNorm = 154.5513, GNorm = 0.3519, lr_0 = 4.7833e-04
Loss = 1.5643e-02, PNorm = 154.5832, GNorm = 0.5348, lr_0 = 4.7801e-04
Loss = 1.3185e-02, PNorm = 154.6178, GNorm = 0.2227, lr_0 = 4.7768e-04
Loss = 1.3419e-02, PNorm = 154.6542, GNorm = 0.4579, lr_0 = 4.7735e-04
Loss = 1.3166e-02, PNorm = 154.6895, GNorm = 0.1718, lr_0 = 4.7703e-04
Validation mae = 0.282370
Epoch 11
Loss = 1.2601e-02, PNorm = 154.7203, GNorm = 0.4230, lr_0 = 4.7670e-04
Loss = 1.4871e-02, PNorm = 154.7457, GNorm = 0.2088, lr_0 = 4.7637e-04
Loss = 1.3547e-02, PNorm = 154.7673, GNorm = 0.3018, lr_0 = 4.7605e-04
Loss = 1.7143e-02, PNorm = 154.7963, GNorm = 0.3761, lr_0 = 4.7572e-04
Loss = 1.3916e-02, PNorm = 154.8288, GNorm = 0.5232, lr_0 = 4.7539e-04
Loss = 1.2068e-02, PNorm = 154.8566, GNorm = 0.2292, lr_0 = 4.7507e-04
Loss = 1.3273e-02, PNorm = 154.8818, GNorm = 0.4867, lr_0 = 4.7474e-04
Loss = 1.3424e-02, PNorm = 154.9041, GNorm = 0.1896, lr_0 = 4.7442e-04
Loss = 1.4124e-02, PNorm = 154.9242, GNorm = 0.1664, lr_0 = 4.7409e-04
Loss = 1.1078e-02, PNorm = 154.9449, GNorm = 0.4189, lr_0 = 4.7377e-04
Loss = 1.1672e-02, PNorm = 154.9656, GNorm = 0.2051, lr_0 = 4.7344e-04
Loss = 1.3978e-02, PNorm = 154.9894, GNorm = 0.4076, lr_0 = 4.7312e-04
Loss = 1.3298e-02, PNorm = 155.0130, GNorm = 0.1719, lr_0 = 4.7279e-04
Loss = 1.3747e-02, PNorm = 155.0400, GNorm = 0.2589, lr_0 = 4.7247e-04
Loss = 1.0845e-02, PNorm = 155.0659, GNorm = 0.1060, lr_0 = 4.7215e-04
Loss = 1.5026e-02, PNorm = 155.0850, GNorm = 0.4797, lr_0 = 4.7182e-04
Loss = 1.2125e-02, PNorm = 155.1062, GNorm = 0.4739, lr_0 = 4.7150e-04
Loss = 1.2678e-02, PNorm = 155.1237, GNorm = 0.5802, lr_0 = 4.7118e-04
Loss = 1.1287e-02, PNorm = 155.1465, GNorm = 0.1904, lr_0 = 4.7085e-04
Loss = 1.1693e-02, PNorm = 155.1721, GNorm = 0.2089, lr_0 = 4.7053e-04
Loss = 1.2028e-02, PNorm = 155.1950, GNorm = 0.3289, lr_0 = 4.7021e-04
Loss = 1.1760e-02, PNorm = 155.2159, GNorm = 0.2309, lr_0 = 4.6989e-04
Loss = 1.1230e-02, PNorm = 155.2393, GNorm = 0.5990, lr_0 = 4.6957e-04
Loss = 1.0724e-02, PNorm = 155.2627, GNorm = 0.3850, lr_0 = 4.6924e-04
Loss = 1.2104e-02, PNorm = 155.2844, GNorm = 0.2226, lr_0 = 4.6892e-04
Loss = 1.1810e-02, PNorm = 155.3127, GNorm = 0.4872, lr_0 = 4.6860e-04
Loss = 1.3071e-02, PNorm = 155.3410, GNorm = 0.3488, lr_0 = 4.6828e-04
Loss = 1.1216e-02, PNorm = 155.3651, GNorm = 0.4338, lr_0 = 4.6796e-04
Loss = 1.1471e-02, PNorm = 155.3915, GNorm = 0.3998, lr_0 = 4.6764e-04
Loss = 1.3839e-02, PNorm = 155.4153, GNorm = 0.2675, lr_0 = 4.6732e-04
Loss = 1.3860e-02, PNorm = 155.4417, GNorm = 0.4343, lr_0 = 4.6700e-04
Loss = 1.2049e-02, PNorm = 155.4659, GNorm = 0.4442, lr_0 = 4.6668e-04
Loss = 1.2088e-02, PNorm = 155.4926, GNorm = 0.4478, lr_0 = 4.6636e-04
Loss = 1.1342e-02, PNorm = 155.5166, GNorm = 0.1834, lr_0 = 4.6604e-04
Loss = 1.2080e-02, PNorm = 155.5430, GNorm = 0.3689, lr_0 = 4.6572e-04
Loss = 1.0069e-02, PNorm = 155.5683, GNorm = 0.4744, lr_0 = 4.6540e-04
Loss = 1.2135e-02, PNorm = 155.5965, GNorm = 0.1658, lr_0 = 4.6508e-04
Loss = 1.3055e-02, PNorm = 155.6188, GNorm = 0.2672, lr_0 = 4.6476e-04
Loss = 1.3126e-02, PNorm = 155.6378, GNorm = 0.5189, lr_0 = 4.6445e-04
Loss = 1.1588e-02, PNorm = 155.6595, GNorm = 0.4320, lr_0 = 4.6413e-04
Loss = 1.2074e-02, PNorm = 155.6818, GNorm = 0.1790, lr_0 = 4.6381e-04
Loss = 1.3541e-02, PNorm = 155.7068, GNorm = 0.4937, lr_0 = 4.6349e-04
Loss = 1.1557e-02, PNorm = 155.7320, GNorm = 0.2799, lr_0 = 4.6317e-04
Loss = 1.3727e-02, PNorm = 155.7605, GNorm = 0.2301, lr_0 = 4.6286e-04
Loss = 1.2984e-02, PNorm = 155.7859, GNorm = 0.1881, lr_0 = 4.6254e-04
Loss = 1.1478e-02, PNorm = 155.8128, GNorm = 0.3821, lr_0 = 4.6222e-04
Loss = 1.1569e-02, PNorm = 155.8395, GNorm = 0.4632, lr_0 = 4.6191e-04
Loss = 1.3415e-02, PNorm = 155.8636, GNorm = 0.7925, lr_0 = 4.6159e-04
Loss = 1.3452e-02, PNorm = 155.8916, GNorm = 0.3909, lr_0 = 4.6127e-04
Loss = 1.4298e-02, PNorm = 155.9236, GNorm = 0.1796, lr_0 = 4.6096e-04
Loss = 1.6052e-02, PNorm = 155.9585, GNorm = 0.4344, lr_0 = 4.6064e-04
Loss = 1.1411e-02, PNorm = 155.9883, GNorm = 0.3881, lr_0 = 4.6033e-04
Loss = 1.3497e-02, PNorm = 156.0169, GNorm = 0.2371, lr_0 = 4.6001e-04
Loss = 1.1944e-02, PNorm = 156.0446, GNorm = 0.8674, lr_0 = 4.5970e-04
Loss = 1.0042e-02, PNorm = 156.0725, GNorm = 0.3482, lr_0 = 4.5938e-04
Loss = 1.1262e-02, PNorm = 156.0956, GNorm = 0.2558, lr_0 = 4.5907e-04
Loss = 1.3245e-02, PNorm = 156.1240, GNorm = 0.1289, lr_0 = 4.5875e-04
Loss = 1.4895e-02, PNorm = 156.1516, GNorm = 0.7687, lr_0 = 4.5844e-04
Loss = 1.0389e-02, PNorm = 156.1746, GNorm = 0.5729, lr_0 = 4.5812e-04
Loss = 1.0425e-02, PNorm = 156.1983, GNorm = 0.2674, lr_0 = 4.5781e-04
Loss = 9.6354e-03, PNorm = 156.2225, GNorm = 0.2053, lr_0 = 4.5750e-04
Loss = 1.1076e-02, PNorm = 156.2427, GNorm = 0.3035, lr_0 = 4.5718e-04
Loss = 1.1922e-02, PNorm = 156.2656, GNorm = 0.1606, lr_0 = 4.5687e-04
Loss = 1.0995e-02, PNorm = 156.2931, GNorm = 0.3279, lr_0 = 4.5656e-04
Loss = 1.1367e-02, PNorm = 156.3214, GNorm = 0.4053, lr_0 = 4.5624e-04
Loss = 1.2845e-02, PNorm = 156.3471, GNorm = 0.4093, lr_0 = 4.5593e-04
Loss = 1.2448e-02, PNorm = 156.3742, GNorm = 0.2930, lr_0 = 4.5562e-04
Loss = 1.1748e-02, PNorm = 156.4015, GNorm = 0.2705, lr_0 = 4.5531e-04
Loss = 1.2056e-02, PNorm = 156.4295, GNorm = 0.1684, lr_0 = 4.5499e-04
Loss = 1.2737e-02, PNorm = 156.4564, GNorm = 0.4042, lr_0 = 4.5468e-04
Loss = 1.1089e-02, PNorm = 156.4850, GNorm = 0.2210, lr_0 = 4.5437e-04
Loss = 1.2302e-02, PNorm = 156.5125, GNorm = 0.1656, lr_0 = 4.5406e-04
Loss = 1.2140e-02, PNorm = 156.5411, GNorm = 0.2660, lr_0 = 4.5375e-04
Loss = 1.2324e-02, PNorm = 156.5688, GNorm = 0.4352, lr_0 = 4.5344e-04
Loss = 1.2354e-02, PNorm = 156.5991, GNorm = 0.2225, lr_0 = 4.5313e-04
Loss = 1.1545e-02, PNorm = 156.6303, GNorm = 0.1582, lr_0 = 4.5282e-04
Loss = 1.0407e-02, PNorm = 156.6567, GNorm = 0.0972, lr_0 = 4.5251e-04
Loss = 1.1224e-02, PNorm = 156.6833, GNorm = 0.1596, lr_0 = 4.5220e-04
Loss = 1.1637e-02, PNorm = 156.7100, GNorm = 0.1791, lr_0 = 4.5189e-04
Loss = 1.1220e-02, PNorm = 156.7384, GNorm = 0.6070, lr_0 = 4.5158e-04
Loss = 1.5276e-02, PNorm = 156.7605, GNorm = 0.1226, lr_0 = 4.5127e-04
Loss = 1.0222e-02, PNorm = 156.7870, GNorm = 0.2887, lr_0 = 4.5096e-04
Loss = 1.1270e-02, PNorm = 156.8092, GNorm = 0.4265, lr_0 = 4.5065e-04
Loss = 1.1656e-02, PNorm = 156.8346, GNorm = 0.4782, lr_0 = 4.5034e-04
Loss = 1.0947e-02, PNorm = 156.8632, GNorm = 0.1362, lr_0 = 4.5003e-04
Loss = 9.4825e-03, PNorm = 156.8887, GNorm = 0.1081, lr_0 = 4.4972e-04
Loss = 1.0705e-02, PNorm = 156.9138, GNorm = 0.1792, lr_0 = 4.4942e-04
Loss = 1.0952e-02, PNorm = 156.9416, GNorm = 0.1066, lr_0 = 4.4911e-04
Loss = 1.1994e-02, PNorm = 156.9705, GNorm = 0.2878, lr_0 = 4.4880e-04
Loss = 1.1930e-02, PNorm = 156.9983, GNorm = 0.2328, lr_0 = 4.4849e-04
Loss = 1.0522e-02, PNorm = 157.0262, GNorm = 0.2665, lr_0 = 4.4819e-04
Loss = 1.0971e-02, PNorm = 157.0517, GNorm = 0.2115, lr_0 = 4.4788e-04
Loss = 1.2552e-02, PNorm = 157.0770, GNorm = 0.3721, lr_0 = 4.4757e-04
Loss = 1.5374e-02, PNorm = 157.1041, GNorm = 0.2806, lr_0 = 4.4727e-04
Loss = 1.3544e-02, PNorm = 157.1294, GNorm = 0.2970, lr_0 = 4.4696e-04
Loss = 1.0143e-02, PNorm = 157.1577, GNorm = 0.2465, lr_0 = 4.4665e-04
Loss = 1.4951e-02, PNorm = 157.1840, GNorm = 0.2426, lr_0 = 4.4635e-04
Loss = 1.0728e-02, PNorm = 157.2119, GNorm = 0.1949, lr_0 = 4.4604e-04
Loss = 1.1801e-02, PNorm = 157.2427, GNorm = 0.2634, lr_0 = 4.4574e-04
Loss = 1.3497e-02, PNorm = 157.2708, GNorm = 0.3851, lr_0 = 4.4543e-04
Loss = 1.3295e-02, PNorm = 157.3013, GNorm = 0.2734, lr_0 = 4.4513e-04
Loss = 1.3231e-02, PNorm = 157.3262, GNorm = 0.3178, lr_0 = 4.4482e-04
Loss = 1.1789e-02, PNorm = 157.3525, GNorm = 0.1187, lr_0 = 4.4452e-04
Loss = 1.1757e-02, PNorm = 157.3825, GNorm = 0.3408, lr_0 = 4.4421e-04
Loss = 1.7034e-02, PNorm = 157.4134, GNorm = 0.7976, lr_0 = 4.4391e-04
Loss = 1.4655e-02, PNorm = 157.4377, GNorm = 0.4455, lr_0 = 4.4360e-04
Loss = 1.3191e-02, PNorm = 157.4625, GNorm = 0.5376, lr_0 = 4.4330e-04
Loss = 1.2317e-02, PNorm = 157.4955, GNorm = 0.3914, lr_0 = 4.4299e-04
Loss = 1.3934e-02, PNorm = 157.5259, GNorm = 0.6869, lr_0 = 4.4269e-04
Loss = 1.4151e-02, PNorm = 157.5573, GNorm = 0.3956, lr_0 = 4.4239e-04
Loss = 9.2134e-03, PNorm = 157.5866, GNorm = 0.1186, lr_0 = 4.4209e-04
Loss = 1.3385e-02, PNorm = 157.6119, GNorm = 0.7578, lr_0 = 4.4178e-04
Loss = 1.1690e-02, PNorm = 157.6401, GNorm = 0.4985, lr_0 = 4.4148e-04
Loss = 1.3274e-02, PNorm = 157.6688, GNorm = 0.3255, lr_0 = 4.4118e-04
Loss = 1.2622e-02, PNorm = 157.7002, GNorm = 0.1851, lr_0 = 4.4088e-04
Loss = 1.3144e-02, PNorm = 157.7304, GNorm = 0.3308, lr_0 = 4.4057e-04
Loss = 1.1510e-02, PNorm = 157.7655, GNorm = 0.2112, lr_0 = 4.4027e-04
Loss = 1.3231e-02, PNorm = 157.7930, GNorm = 0.3011, lr_0 = 4.3997e-04
Loss = 1.0688e-02, PNorm = 157.8200, GNorm = 0.2795, lr_0 = 4.3967e-04
Loss = 1.1702e-02, PNorm = 157.8510, GNorm = 0.3065, lr_0 = 4.3937e-04
Validation mae = 0.280755
Epoch 12
Loss = 1.4340e-02, PNorm = 157.8794, GNorm = 0.2833, lr_0 = 4.3907e-04
Loss = 1.0846e-02, PNorm = 157.9023, GNorm = 0.1781, lr_0 = 4.3877e-04
Loss = 1.1163e-02, PNorm = 157.9228, GNorm = 0.2643, lr_0 = 4.3846e-04
Loss = 9.9244e-03, PNorm = 157.9393, GNorm = 0.3763, lr_0 = 4.3816e-04
Loss = 9.7596e-03, PNorm = 157.9541, GNorm = 0.1945, lr_0 = 4.3786e-04
Loss = 9.5611e-03, PNorm = 157.9720, GNorm = 0.2555, lr_0 = 4.3756e-04
Loss = 9.5841e-03, PNorm = 157.9902, GNorm = 0.2280, lr_0 = 4.3726e-04
Loss = 1.0260e-02, PNorm = 158.0091, GNorm = 0.1475, lr_0 = 4.3696e-04
Loss = 9.9217e-03, PNorm = 158.0278, GNorm = 0.2919, lr_0 = 4.3667e-04
Loss = 9.0564e-03, PNorm = 158.0458, GNorm = 0.1492, lr_0 = 4.3637e-04
Loss = 9.2678e-03, PNorm = 158.0669, GNorm = 0.6059, lr_0 = 4.3607e-04
Loss = 8.5120e-03, PNorm = 158.0858, GNorm = 0.1716, lr_0 = 4.3577e-04
Loss = 1.0680e-02, PNorm = 158.1029, GNorm = 0.2568, lr_0 = 4.3547e-04
Loss = 1.2619e-02, PNorm = 158.1232, GNorm = 0.3626, lr_0 = 4.3517e-04
Loss = 9.0057e-03, PNorm = 158.1414, GNorm = 0.2770, lr_0 = 4.3487e-04
Loss = 1.4838e-02, PNorm = 158.1547, GNorm = 0.4167, lr_0 = 4.3458e-04
Loss = 1.0145e-02, PNorm = 158.1692, GNorm = 0.3160, lr_0 = 4.3428e-04
Loss = 8.1903e-03, PNorm = 158.1863, GNorm = 0.4587, lr_0 = 4.3398e-04
Loss = 1.0070e-02, PNorm = 158.2042, GNorm = 0.2690, lr_0 = 4.3368e-04
Loss = 8.8789e-03, PNorm = 158.2265, GNorm = 0.1438, lr_0 = 4.3339e-04
Loss = 9.9657e-03, PNorm = 158.2451, GNorm = 0.2513, lr_0 = 4.3309e-04
Loss = 1.0106e-02, PNorm = 158.2620, GNorm = 0.3815, lr_0 = 4.3279e-04
Loss = 1.0090e-02, PNorm = 158.2815, GNorm = 0.1603, lr_0 = 4.3250e-04
Loss = 9.5754e-03, PNorm = 158.2952, GNorm = 0.4250, lr_0 = 4.3220e-04
Loss = 1.3315e-02, PNorm = 158.3118, GNorm = 0.1096, lr_0 = 4.3190e-04
Loss = 1.1215e-02, PNorm = 158.3346, GNorm = 0.2971, lr_0 = 4.3161e-04
Loss = 1.3329e-02, PNorm = 158.3569, GNorm = 0.4903, lr_0 = 4.3131e-04
Loss = 8.8756e-03, PNorm = 158.3830, GNorm = 0.1077, lr_0 = 4.3102e-04
Loss = 9.2836e-03, PNorm = 158.4015, GNorm = 0.1552, lr_0 = 4.3072e-04
Loss = 1.2881e-02, PNorm = 158.4251, GNorm = 0.1506, lr_0 = 4.3043e-04
Loss = 9.1939e-03, PNorm = 158.4488, GNorm = 0.5417, lr_0 = 4.3013e-04
Loss = 1.0105e-02, PNorm = 158.4712, GNorm = 0.1077, lr_0 = 4.2984e-04
Loss = 1.0073e-02, PNorm = 158.4913, GNorm = 0.4301, lr_0 = 4.2954e-04
Loss = 8.0523e-03, PNorm = 158.5133, GNorm = 0.2857, lr_0 = 4.2925e-04
Loss = 1.0148e-02, PNorm = 158.5347, GNorm = 0.1874, lr_0 = 4.2895e-04
Loss = 1.1159e-02, PNorm = 158.5528, GNorm = 0.2057, lr_0 = 4.2866e-04
Loss = 9.9323e-03, PNorm = 158.5727, GNorm = 0.2623, lr_0 = 4.2837e-04
Loss = 1.3154e-02, PNorm = 158.5995, GNorm = 0.4166, lr_0 = 4.2807e-04
Loss = 9.7325e-03, PNorm = 158.6188, GNorm = 0.3437, lr_0 = 4.2778e-04
Loss = 9.3351e-03, PNorm = 158.6407, GNorm = 0.2092, lr_0 = 4.2749e-04
Loss = 9.0288e-03, PNorm = 158.6584, GNorm = 0.2272, lr_0 = 4.2719e-04
Loss = 1.0317e-02, PNorm = 158.6791, GNorm = 0.1725, lr_0 = 4.2690e-04
Loss = 8.5294e-03, PNorm = 158.6959, GNorm = 0.3476, lr_0 = 4.2661e-04
Loss = 9.2022e-03, PNorm = 158.7152, GNorm = 0.1833, lr_0 = 4.2632e-04
Loss = 8.9800e-03, PNorm = 158.7318, GNorm = 0.2880, lr_0 = 4.2602e-04
Loss = 1.2374e-02, PNorm = 158.7528, GNorm = 0.2589, lr_0 = 4.2573e-04
Loss = 9.5915e-03, PNorm = 158.7723, GNorm = 0.3940, lr_0 = 4.2544e-04
Loss = 1.2313e-02, PNorm = 158.7914, GNorm = 0.3111, lr_0 = 4.2515e-04
Loss = 8.8827e-03, PNorm = 158.8107, GNorm = 0.1991, lr_0 = 4.2486e-04
Loss = 9.0724e-03, PNorm = 158.8335, GNorm = 0.2327, lr_0 = 4.2457e-04
Loss = 1.1842e-02, PNorm = 158.8511, GNorm = 0.2052, lr_0 = 4.2428e-04
Loss = 9.9450e-03, PNorm = 158.8741, GNorm = 0.1519, lr_0 = 4.2399e-04
Loss = 8.6650e-03, PNorm = 158.8964, GNorm = 0.2548, lr_0 = 4.2370e-04
Loss = 7.8333e-03, PNorm = 158.9147, GNorm = 0.2177, lr_0 = 4.2340e-04
Loss = 1.1606e-02, PNorm = 158.9323, GNorm = 0.3485, lr_0 = 4.2311e-04
Loss = 9.0197e-03, PNorm = 158.9472, GNorm = 0.3934, lr_0 = 4.2283e-04
Loss = 9.0084e-03, PNorm = 158.9681, GNorm = 0.2962, lr_0 = 4.2254e-04
Loss = 8.5300e-03, PNorm = 158.9905, GNorm = 0.2182, lr_0 = 4.2225e-04
Loss = 1.1418e-02, PNorm = 159.0125, GNorm = 0.3856, lr_0 = 4.2196e-04
Loss = 8.9282e-03, PNorm = 159.0271, GNorm = 0.3871, lr_0 = 4.2167e-04
Loss = 1.2088e-02, PNorm = 159.0502, GNorm = 0.4197, lr_0 = 4.2138e-04
Loss = 1.0649e-02, PNorm = 159.0737, GNorm = 0.1180, lr_0 = 4.2109e-04
Loss = 8.5330e-03, PNorm = 159.0948, GNorm = 0.1848, lr_0 = 4.2080e-04
Loss = 1.0033e-02, PNorm = 159.1155, GNorm = 0.1852, lr_0 = 4.2051e-04
Loss = 1.0335e-02, PNorm = 159.1440, GNorm = 0.5785, lr_0 = 4.2023e-04
Loss = 1.1166e-02, PNorm = 159.1681, GNorm = 0.2262, lr_0 = 4.1994e-04
Loss = 1.2765e-02, PNorm = 159.1897, GNorm = 0.0988, lr_0 = 4.1965e-04
Loss = 1.2133e-02, PNorm = 159.2113, GNorm = 0.4084, lr_0 = 4.1936e-04
Loss = 9.8131e-03, PNorm = 159.2343, GNorm = 0.1347, lr_0 = 4.1907e-04
Loss = 1.0599e-02, PNorm = 159.2584, GNorm = 0.3475, lr_0 = 4.1879e-04
Loss = 9.6211e-03, PNorm = 159.2819, GNorm = 0.5684, lr_0 = 4.1850e-04
Loss = 1.0299e-02, PNorm = 159.3040, GNorm = 0.1878, lr_0 = 4.1821e-04
Loss = 1.0970e-02, PNorm = 159.3264, GNorm = 0.1567, lr_0 = 4.1793e-04
Loss = 8.8022e-03, PNorm = 159.3498, GNorm = 0.1701, lr_0 = 4.1764e-04
Loss = 9.8961e-03, PNorm = 159.3729, GNorm = 0.2590, lr_0 = 4.1736e-04
Loss = 9.8570e-03, PNorm = 159.3960, GNorm = 0.2651, lr_0 = 4.1707e-04
Loss = 1.0774e-02, PNorm = 159.4193, GNorm = 0.3915, lr_0 = 4.1678e-04
Loss = 9.5055e-03, PNorm = 159.4455, GNorm = 0.1106, lr_0 = 4.1650e-04
Loss = 1.2382e-02, PNorm = 159.4713, GNorm = 0.2492, lr_0 = 4.1621e-04
Loss = 1.1761e-02, PNorm = 159.4967, GNorm = 0.3967, lr_0 = 4.1593e-04
Loss = 9.9326e-03, PNorm = 159.5216, GNorm = 0.4186, lr_0 = 4.1564e-04
Loss = 1.1666e-02, PNorm = 159.5492, GNorm = 0.6172, lr_0 = 4.1536e-04
Loss = 1.0654e-02, PNorm = 159.5739, GNorm = 0.1421, lr_0 = 4.1507e-04
Loss = 1.1040e-02, PNorm = 159.5977, GNorm = 0.2409, lr_0 = 4.1479e-04
Loss = 1.0302e-02, PNorm = 159.6210, GNorm = 0.2796, lr_0 = 4.1450e-04
Loss = 9.7538e-03, PNorm = 159.6437, GNorm = 0.2324, lr_0 = 4.1422e-04
Loss = 1.0646e-02, PNorm = 159.6699, GNorm = 0.3473, lr_0 = 4.1394e-04
Loss = 1.0205e-02, PNorm = 159.6943, GNorm = 0.4371, lr_0 = 4.1365e-04
Loss = 9.8336e-03, PNorm = 159.7168, GNorm = 0.2943, lr_0 = 4.1337e-04
Loss = 9.9731e-03, PNorm = 159.7383, GNorm = 0.3741, lr_0 = 4.1309e-04
Loss = 1.1254e-02, PNorm = 159.7599, GNorm = 0.5444, lr_0 = 4.1280e-04
Loss = 9.4501e-03, PNorm = 159.7845, GNorm = 0.3973, lr_0 = 4.1252e-04
Loss = 1.1371e-02, PNorm = 159.8043, GNorm = 0.3971, lr_0 = 4.1224e-04
Loss = 8.1292e-03, PNorm = 159.8237, GNorm = 0.3575, lr_0 = 4.1196e-04
Loss = 1.2360e-02, PNorm = 159.8463, GNorm = 0.1223, lr_0 = 4.1167e-04
Loss = 9.7514e-03, PNorm = 159.8711, GNorm = 0.3041, lr_0 = 4.1139e-04
Loss = 1.1966e-02, PNorm = 159.8924, GNorm = 0.1574, lr_0 = 4.1111e-04
Loss = 1.0330e-02, PNorm = 159.9156, GNorm = 0.5306, lr_0 = 4.1083e-04
Loss = 8.7724e-03, PNorm = 159.9393, GNorm = 0.1728, lr_0 = 4.1055e-04
Loss = 1.0283e-02, PNorm = 159.9651, GNorm = 0.2732, lr_0 = 4.1027e-04
Loss = 1.1293e-02, PNorm = 159.9886, GNorm = 0.1565, lr_0 = 4.0998e-04
Loss = 9.7297e-03, PNorm = 160.0111, GNorm = 0.1569, lr_0 = 4.0970e-04
Loss = 8.9719e-03, PNorm = 160.0341, GNorm = 0.3202, lr_0 = 4.0942e-04
Loss = 9.6708e-03, PNorm = 160.0597, GNorm = 0.2510, lr_0 = 4.0914e-04
Loss = 1.2536e-02, PNorm = 160.0866, GNorm = 0.1917, lr_0 = 4.0886e-04
Loss = 1.0223e-02, PNorm = 160.1114, GNorm = 0.3454, lr_0 = 4.0858e-04
Loss = 9.9215e-03, PNorm = 160.1374, GNorm = 0.2276, lr_0 = 4.0830e-04
Loss = 7.9367e-03, PNorm = 160.1578, GNorm = 0.3879, lr_0 = 4.0802e-04
Loss = 9.6591e-03, PNorm = 160.1780, GNorm = 0.2122, lr_0 = 4.0774e-04
Loss = 1.1527e-02, PNorm = 160.2053, GNorm = 0.2215, lr_0 = 4.0746e-04
Loss = 1.1100e-02, PNorm = 160.2332, GNorm = 0.1336, lr_0 = 4.0718e-04
Loss = 9.0881e-03, PNorm = 160.2585, GNorm = 0.1829, lr_0 = 4.0691e-04
Loss = 1.0651e-02, PNorm = 160.2761, GNorm = 0.5673, lr_0 = 4.0663e-04
Loss = 9.8437e-03, PNorm = 160.2987, GNorm = 0.2343, lr_0 = 4.0635e-04
Loss = 1.0999e-02, PNorm = 160.3253, GNorm = 0.2918, lr_0 = 4.0607e-04
Loss = 1.0685e-02, PNorm = 160.3474, GNorm = 0.2364, lr_0 = 4.0579e-04
Loss = 1.2500e-02, PNorm = 160.3754, GNorm = 0.3186, lr_0 = 4.0551e-04
Loss = 9.4266e-03, PNorm = 160.3976, GNorm = 0.4892, lr_0 = 4.0524e-04
Loss = 1.2391e-02, PNorm = 160.4295, GNorm = 0.7062, lr_0 = 4.0496e-04
Loss = 9.2085e-03, PNorm = 160.4581, GNorm = 0.3283, lr_0 = 4.0468e-04
Validation mae = 0.280852
Epoch 13
Loss = 1.2099e-02, PNorm = 160.4775, GNorm = 0.1558, lr_0 = 4.0440e-04
Loss = 9.0739e-03, PNorm = 160.4944, GNorm = 0.1411, lr_0 = 4.0413e-04
Loss = 1.1322e-02, PNorm = 160.5075, GNorm = 0.1293, lr_0 = 4.0385e-04
Loss = 9.3676e-03, PNorm = 160.5210, GNorm = 0.3118, lr_0 = 4.0357e-04
Loss = 7.8417e-03, PNorm = 160.5350, GNorm = 0.3910, lr_0 = 4.0330e-04
Loss = 8.6624e-03, PNorm = 160.5513, GNorm = 0.2220, lr_0 = 4.0302e-04
Loss = 8.3930e-03, PNorm = 160.5649, GNorm = 0.1749, lr_0 = 4.0274e-04
Loss = 1.0104e-02, PNorm = 160.5790, GNorm = 0.3567, lr_0 = 4.0247e-04
Loss = 9.2087e-03, PNorm = 160.5950, GNorm = 0.0873, lr_0 = 4.0219e-04
Loss = 9.4773e-03, PNorm = 160.6144, GNorm = 0.2285, lr_0 = 4.0192e-04
Loss = 9.1555e-03, PNorm = 160.6321, GNorm = 0.3093, lr_0 = 4.0164e-04
Loss = 7.4316e-03, PNorm = 160.6487, GNorm = 0.2940, lr_0 = 4.0137e-04
Loss = 1.0138e-02, PNorm = 160.6592, GNorm = 0.2059, lr_0 = 4.0109e-04
Loss = 1.0597e-02, PNorm = 160.6787, GNorm = 0.4339, lr_0 = 4.0082e-04
Loss = 8.3990e-03, PNorm = 160.6985, GNorm = 0.2846, lr_0 = 4.0054e-04
Loss = 1.0483e-02, PNorm = 160.7173, GNorm = 0.1961, lr_0 = 4.0027e-04
Loss = 8.8686e-03, PNorm = 160.7356, GNorm = 0.5688, lr_0 = 3.9999e-04
Loss = 1.0241e-02, PNorm = 160.7536, GNorm = 0.2694, lr_0 = 3.9972e-04
Loss = 9.4161e-03, PNorm = 160.7693, GNorm = 0.1043, lr_0 = 3.9945e-04
Loss = 7.8866e-03, PNorm = 160.7850, GNorm = 0.0994, lr_0 = 3.9917e-04
Loss = 7.4963e-03, PNorm = 160.8002, GNorm = 0.3850, lr_0 = 3.9890e-04
Loss = 1.0824e-02, PNorm = 160.8122, GNorm = 0.1775, lr_0 = 3.9863e-04
Loss = 9.3802e-03, PNorm = 160.8238, GNorm = 0.2932, lr_0 = 3.9835e-04
Loss = 9.1348e-03, PNorm = 160.8392, GNorm = 0.2848, lr_0 = 3.9808e-04
Loss = 7.3426e-03, PNorm = 160.8579, GNorm = 0.2073, lr_0 = 3.9781e-04
Loss = 9.3903e-03, PNorm = 160.8770, GNorm = 0.2816, lr_0 = 3.9753e-04
Loss = 8.0928e-03, PNorm = 160.8968, GNorm = 0.2356, lr_0 = 3.9726e-04
Loss = 7.9607e-03, PNorm = 160.9173, GNorm = 0.1788, lr_0 = 3.9699e-04
Loss = 8.6380e-03, PNorm = 160.9373, GNorm = 0.2042, lr_0 = 3.9672e-04
Loss = 7.0518e-03, PNorm = 160.9567, GNorm = 0.2508, lr_0 = 3.9645e-04
Loss = 8.6245e-03, PNorm = 160.9710, GNorm = 0.1229, lr_0 = 3.9617e-04
Loss = 1.0121e-02, PNorm = 160.9860, GNorm = 0.1305, lr_0 = 3.9590e-04
Loss = 8.2283e-03, PNorm = 161.0038, GNorm = 0.4111, lr_0 = 3.9563e-04
Loss = 8.2397e-03, PNorm = 161.0214, GNorm = 0.1262, lr_0 = 3.9536e-04
Loss = 9.8396e-03, PNorm = 161.0418, GNorm = 0.1060, lr_0 = 3.9509e-04
Loss = 9.5723e-03, PNorm = 161.0631, GNorm = 0.2595, lr_0 = 3.9482e-04
Loss = 7.2108e-03, PNorm = 161.0828, GNorm = 0.1489, lr_0 = 3.9455e-04
Loss = 7.8499e-03, PNorm = 161.1014, GNorm = 0.1611, lr_0 = 3.9428e-04
Loss = 6.7312e-03, PNorm = 161.1179, GNorm = 0.3144, lr_0 = 3.9401e-04
Loss = 8.5454e-03, PNorm = 161.1341, GNorm = 0.1400, lr_0 = 3.9374e-04
Loss = 6.9760e-03, PNorm = 161.1515, GNorm = 0.1079, lr_0 = 3.9347e-04
Loss = 8.2437e-03, PNorm = 161.1711, GNorm = 0.3151, lr_0 = 3.9320e-04
Loss = 9.6630e-03, PNorm = 161.1876, GNorm = 0.4640, lr_0 = 3.9293e-04
Loss = 7.0406e-03, PNorm = 161.2095, GNorm = 0.2958, lr_0 = 3.9266e-04
Loss = 6.9854e-03, PNorm = 161.2244, GNorm = 0.3995, lr_0 = 3.9239e-04
Loss = 7.5273e-03, PNorm = 161.2421, GNorm = 0.4729, lr_0 = 3.9212e-04
Loss = 1.0545e-02, PNorm = 161.2572, GNorm = 0.3840, lr_0 = 3.9185e-04
Loss = 8.8679e-03, PNorm = 161.2755, GNorm = 0.1226, lr_0 = 3.9159e-04
Loss = 8.7458e-03, PNorm = 161.2913, GNorm = 0.2617, lr_0 = 3.9132e-04
Loss = 9.2847e-03, PNorm = 161.3085, GNorm = 0.3845, lr_0 = 3.9105e-04
Loss = 1.0970e-02, PNorm = 161.3241, GNorm = 0.1829, lr_0 = 3.9078e-04
Loss = 7.1667e-03, PNorm = 161.3438, GNorm = 0.1290, lr_0 = 3.9051e-04
Loss = 7.4669e-03, PNorm = 161.3609, GNorm = 0.3373, lr_0 = 3.9025e-04
Loss = 8.7070e-03, PNorm = 161.3751, GNorm = 0.5724, lr_0 = 3.8998e-04
Loss = 8.5219e-03, PNorm = 161.3881, GNorm = 0.6414, lr_0 = 3.8971e-04
Loss = 1.2846e-02, PNorm = 161.4091, GNorm = 0.8478, lr_0 = 3.8945e-04
Loss = 8.1556e-03, PNorm = 161.4268, GNorm = 0.1412, lr_0 = 3.8918e-04
Loss = 7.7809e-03, PNorm = 161.4476, GNorm = 0.2612, lr_0 = 3.8891e-04
Loss = 1.2265e-02, PNorm = 161.4655, GNorm = 0.1331, lr_0 = 3.8865e-04
Loss = 8.6560e-03, PNorm = 161.4836, GNorm = 0.1699, lr_0 = 3.8838e-04
Loss = 9.9245e-03, PNorm = 161.5033, GNorm = 0.3290, lr_0 = 3.8811e-04
Loss = 9.5584e-03, PNorm = 161.5216, GNorm = 0.3092, lr_0 = 3.8785e-04
Loss = 8.2537e-03, PNorm = 161.5395, GNorm = 0.1364, lr_0 = 3.8758e-04
Loss = 8.1187e-03, PNorm = 161.5574, GNorm = 0.1441, lr_0 = 3.8732e-04
Loss = 8.3381e-03, PNorm = 161.5741, GNorm = 0.2238, lr_0 = 3.8705e-04
Loss = 1.0006e-02, PNorm = 161.5959, GNorm = 0.3119, lr_0 = 3.8679e-04
Loss = 8.1014e-03, PNorm = 161.6154, GNorm = 0.1555, lr_0 = 3.8652e-04
Loss = 8.9147e-03, PNorm = 161.6350, GNorm = 0.1877, lr_0 = 3.8626e-04
Loss = 7.7079e-03, PNorm = 161.6520, GNorm = 0.1243, lr_0 = 3.8599e-04
Loss = 1.0362e-02, PNorm = 161.6693, GNorm = 0.0982, lr_0 = 3.8573e-04
Loss = 8.4098e-03, PNorm = 161.6887, GNorm = 0.3312, lr_0 = 3.8546e-04
Loss = 6.8196e-03, PNorm = 161.7076, GNorm = 0.2492, lr_0 = 3.8520e-04
Loss = 8.9267e-03, PNorm = 161.7271, GNorm = 0.4066, lr_0 = 3.8493e-04
Loss = 6.9040e-03, PNorm = 161.7434, GNorm = 0.2883, lr_0 = 3.8467e-04
Loss = 8.7310e-03, PNorm = 161.7616, GNorm = 0.3974, lr_0 = 3.8441e-04
Loss = 9.2104e-03, PNorm = 161.7796, GNorm = 0.4663, lr_0 = 3.8414e-04
Loss = 8.3341e-03, PNorm = 161.7987, GNorm = 0.1622, lr_0 = 3.8388e-04
Loss = 8.0275e-03, PNorm = 161.8158, GNorm = 0.0893, lr_0 = 3.8362e-04
Loss = 6.7994e-03, PNorm = 161.8296, GNorm = 0.1884, lr_0 = 3.8336e-04
Loss = 7.9149e-03, PNorm = 161.8428, GNorm = 0.2405, lr_0 = 3.8309e-04
Loss = 6.9310e-03, PNorm = 161.8575, GNorm = 0.2217, lr_0 = 3.8283e-04
Loss = 9.8344e-03, PNorm = 161.8725, GNorm = 0.1879, lr_0 = 3.8257e-04
Loss = 1.2559e-02, PNorm = 161.8895, GNorm = 0.3093, lr_0 = 3.8231e-04
Loss = 7.0805e-03, PNorm = 161.9072, GNorm = 0.3532, lr_0 = 3.8204e-04
Loss = 7.6103e-03, PNorm = 161.9248, GNorm = 0.3092, lr_0 = 3.8178e-04
Loss = 8.2137e-03, PNorm = 161.9405, GNorm = 0.0667, lr_0 = 3.8152e-04
Loss = 9.3444e-03, PNorm = 161.9584, GNorm = 0.2231, lr_0 = 3.8126e-04
Loss = 7.6790e-03, PNorm = 161.9765, GNorm = 0.5519, lr_0 = 3.8100e-04
Loss = 8.6891e-03, PNorm = 161.9976, GNorm = 0.2284, lr_0 = 3.8074e-04
Loss = 6.7940e-03, PNorm = 162.0221, GNorm = 0.1671, lr_0 = 3.8048e-04
Loss = 7.4006e-03, PNorm = 162.0457, GNorm = 0.2739, lr_0 = 3.8022e-04
Loss = 7.3696e-03, PNorm = 162.0686, GNorm = 0.1451, lr_0 = 3.7995e-04
Loss = 7.5037e-03, PNorm = 162.0882, GNorm = 0.2608, lr_0 = 3.7969e-04
Loss = 7.4738e-03, PNorm = 162.1102, GNorm = 0.2766, lr_0 = 3.7943e-04
Loss = 7.9711e-03, PNorm = 162.1275, GNorm = 0.2846, lr_0 = 3.7917e-04
Loss = 7.8262e-03, PNorm = 162.1454, GNorm = 0.4093, lr_0 = 3.7891e-04
Loss = 6.6837e-03, PNorm = 162.1629, GNorm = 0.3555, lr_0 = 3.7866e-04
Loss = 7.1757e-03, PNorm = 162.1800, GNorm = 0.3147, lr_0 = 3.7840e-04
Loss = 8.2259e-03, PNorm = 162.1988, GNorm = 0.3775, lr_0 = 3.7814e-04
Loss = 7.6424e-03, PNorm = 162.2204, GNorm = 0.1291, lr_0 = 3.7788e-04
Loss = 8.8144e-03, PNorm = 162.2417, GNorm = 0.6335, lr_0 = 3.7762e-04
Loss = 7.3557e-03, PNorm = 162.2604, GNorm = 0.4337, lr_0 = 3.7736e-04
Loss = 7.8597e-03, PNorm = 162.2799, GNorm = 0.1799, lr_0 = 3.7710e-04
Loss = 8.8160e-03, PNorm = 162.2971, GNorm = 0.2897, lr_0 = 3.7684e-04
Loss = 9.3439e-03, PNorm = 162.3160, GNorm = 0.5048, lr_0 = 3.7659e-04
Loss = 7.4772e-03, PNorm = 162.3370, GNorm = 0.2680, lr_0 = 3.7633e-04
Loss = 8.4507e-03, PNorm = 162.3556, GNorm = 0.0977, lr_0 = 3.7607e-04
Loss = 7.2489e-03, PNorm = 162.3754, GNorm = 0.1997, lr_0 = 3.7581e-04
Loss = 7.1089e-03, PNorm = 162.3926, GNorm = 0.3123, lr_0 = 3.7555e-04
Loss = 7.8839e-03, PNorm = 162.4145, GNorm = 0.3064, lr_0 = 3.7530e-04
Loss = 9.0433e-03, PNorm = 162.4316, GNorm = 0.7202, lr_0 = 3.7504e-04
Loss = 8.7747e-03, PNorm = 162.4549, GNorm = 0.1541, lr_0 = 3.7478e-04
Loss = 8.2446e-03, PNorm = 162.4737, GNorm = 0.3732, lr_0 = 3.7453e-04
Loss = 7.7254e-03, PNorm = 162.4911, GNorm = 0.1222, lr_0 = 3.7427e-04
Loss = 9.3129e-03, PNorm = 162.5083, GNorm = 0.3045, lr_0 = 3.7401e-04
Loss = 1.0166e-02, PNorm = 162.5303, GNorm = 0.2178, lr_0 = 3.7376e-04
Loss = 9.6797e-03, PNorm = 162.5528, GNorm = 0.1069, lr_0 = 3.7350e-04
Loss = 6.4427e-03, PNorm = 162.5700, GNorm = 0.1782, lr_0 = 3.7325e-04
Loss = 1.1139e-02, PNorm = 162.5842, GNorm = 0.1420, lr_0 = 3.7299e-04
Loss = 7.2219e-03, PNorm = 162.6000, GNorm = 0.1991, lr_0 = 3.7273e-04
Validation mae = 0.279230
Epoch 14
Loss = 6.9189e-03, PNorm = 162.6160, GNorm = 0.2368, lr_0 = 3.7248e-04
Loss = 8.3729e-03, PNorm = 162.6302, GNorm = 0.2355, lr_0 = 3.7222e-04
Loss = 8.1763e-03, PNorm = 162.6424, GNorm = 0.3805, lr_0 = 3.7197e-04
Loss = 6.8945e-03, PNorm = 162.6549, GNorm = 0.1766, lr_0 = 3.7171e-04
Loss = 6.6996e-03, PNorm = 162.6710, GNorm = 0.1709, lr_0 = 3.7146e-04
Loss = 6.8173e-03, PNorm = 162.6808, GNorm = 0.2215, lr_0 = 3.7120e-04
Loss = 7.9320e-03, PNorm = 162.6939, GNorm = 0.4101, lr_0 = 3.7095e-04
Loss = 7.2262e-03, PNorm = 162.7046, GNorm = 0.2619, lr_0 = 3.7070e-04
Loss = 8.1232e-03, PNorm = 162.7160, GNorm = 0.3263, lr_0 = 3.7044e-04
Loss = 5.7640e-03, PNorm = 162.7287, GNorm = 0.3169, lr_0 = 3.7019e-04
Loss = 6.6290e-03, PNorm = 162.7434, GNorm = 0.5772, lr_0 = 3.6993e-04
Loss = 6.5291e-03, PNorm = 162.7584, GNorm = 0.4418, lr_0 = 3.6968e-04
Loss = 6.6785e-03, PNorm = 162.7755, GNorm = 0.2310, lr_0 = 3.6943e-04
Loss = 6.7265e-03, PNorm = 162.7897, GNorm = 0.4219, lr_0 = 3.6917e-04
Loss = 6.2439e-03, PNorm = 162.7998, GNorm = 0.1029, lr_0 = 3.6892e-04
Loss = 5.8799e-03, PNorm = 162.8128, GNorm = 0.4887, lr_0 = 3.6867e-04
Loss = 7.3797e-03, PNorm = 162.8267, GNorm = 0.2762, lr_0 = 3.6842e-04
Loss = 7.1931e-03, PNorm = 162.8433, GNorm = 0.1376, lr_0 = 3.6816e-04
Loss = 6.0182e-03, PNorm = 162.8597, GNorm = 0.2659, lr_0 = 3.6791e-04
Loss = 5.4924e-03, PNorm = 162.8704, GNorm = 0.2561, lr_0 = 3.6766e-04
Loss = 6.6723e-03, PNorm = 162.8844, GNorm = 0.2122, lr_0 = 3.6741e-04
Loss = 7.0837e-03, PNorm = 162.9010, GNorm = 0.1464, lr_0 = 3.6716e-04
Loss = 5.7687e-03, PNorm = 162.9181, GNorm = 0.2036, lr_0 = 3.6690e-04
Loss = 8.5859e-03, PNorm = 162.9340, GNorm = 0.2515, lr_0 = 3.6665e-04
Loss = 9.0984e-03, PNorm = 162.9425, GNorm = 0.2070, lr_0 = 3.6640e-04
Loss = 7.1621e-03, PNorm = 162.9559, GNorm = 0.1835, lr_0 = 3.6615e-04
Loss = 5.7225e-03, PNorm = 162.9712, GNorm = 0.1910, lr_0 = 3.6590e-04
Loss = 6.7319e-03, PNorm = 162.9868, GNorm = 0.2542, lr_0 = 3.6565e-04
Loss = 9.1589e-03, PNorm = 163.0041, GNorm = 0.3292, lr_0 = 3.6540e-04
Loss = 5.6343e-03, PNorm = 163.0192, GNorm = 0.2126, lr_0 = 3.6515e-04
Loss = 6.3658e-03, PNorm = 163.0340, GNorm = 0.0998, lr_0 = 3.6490e-04
Loss = 6.8867e-03, PNorm = 163.0486, GNorm = 0.3227, lr_0 = 3.6465e-04
Loss = 6.9060e-03, PNorm = 163.0612, GNorm = 0.1679, lr_0 = 3.6440e-04
Loss = 6.6125e-03, PNorm = 163.0747, GNorm = 0.2314, lr_0 = 3.6415e-04
Loss = 6.0815e-03, PNorm = 163.0876, GNorm = 0.3522, lr_0 = 3.6390e-04
Loss = 6.8856e-03, PNorm = 163.1003, GNorm = 0.3536, lr_0 = 3.6365e-04
Loss = 7.5815e-03, PNorm = 163.1127, GNorm = 0.2848, lr_0 = 3.6340e-04
Loss = 8.5831e-03, PNorm = 163.1277, GNorm = 0.1324, lr_0 = 3.6315e-04
Loss = 8.0582e-03, PNorm = 163.1408, GNorm = 0.2701, lr_0 = 3.6290e-04
Loss = 6.4870e-03, PNorm = 163.1529, GNorm = 0.4210, lr_0 = 3.6266e-04
Loss = 9.7875e-03, PNorm = 163.1672, GNorm = 0.1869, lr_0 = 3.6241e-04
Loss = 7.3719e-03, PNorm = 163.1842, GNorm = 0.1699, lr_0 = 3.6216e-04
Loss = 8.1024e-03, PNorm = 163.1997, GNorm = 0.3494, lr_0 = 3.6191e-04
Loss = 8.0870e-03, PNorm = 163.2130, GNorm = 0.0902, lr_0 = 3.6166e-04
Loss = 6.5642e-03, PNorm = 163.2248, GNorm = 0.3278, lr_0 = 3.6141e-04
Loss = 6.7330e-03, PNorm = 163.2346, GNorm = 0.3458, lr_0 = 3.6117e-04
Loss = 7.0898e-03, PNorm = 163.2509, GNorm = 0.1915, lr_0 = 3.6092e-04
Loss = 7.4352e-03, PNorm = 163.2670, GNorm = 0.3470, lr_0 = 3.6067e-04
Loss = 6.9942e-03, PNorm = 163.2818, GNorm = 0.4751, lr_0 = 3.6043e-04
Loss = 6.5336e-03, PNorm = 163.2979, GNorm = 0.2780, lr_0 = 3.6018e-04
Loss = 7.0936e-03, PNorm = 163.3139, GNorm = 0.3402, lr_0 = 3.5993e-04
Loss = 7.5516e-03, PNorm = 163.3283, GNorm = 0.2940, lr_0 = 3.5969e-04
Loss = 6.3602e-03, PNorm = 163.3412, GNorm = 0.0686, lr_0 = 3.5944e-04
Loss = 5.9963e-03, PNorm = 163.3555, GNorm = 0.2959, lr_0 = 3.5919e-04
Loss = 7.7735e-03, PNorm = 163.3683, GNorm = 0.2241, lr_0 = 3.5895e-04
Loss = 8.0976e-03, PNorm = 163.3810, GNorm = 0.1538, lr_0 = 3.5870e-04
Loss = 7.6192e-03, PNorm = 163.3947, GNorm = 0.3603, lr_0 = 3.5845e-04
Loss = 7.5668e-03, PNorm = 163.4083, GNorm = 0.6832, lr_0 = 3.5821e-04
Loss = 5.4332e-03, PNorm = 163.4236, GNorm = 0.1435, lr_0 = 3.5796e-04
Loss = 7.4445e-03, PNorm = 163.4366, GNorm = 0.2189, lr_0 = 3.5772e-04
Loss = 7.2642e-03, PNorm = 163.4537, GNorm = 0.2660, lr_0 = 3.5747e-04
Loss = 7.0081e-03, PNorm = 163.4691, GNorm = 0.3237, lr_0 = 3.5723e-04
Loss = 6.2245e-03, PNorm = 163.4865, GNorm = 0.0984, lr_0 = 3.5698e-04
Loss = 8.6440e-03, PNorm = 163.5041, GNorm = 0.1760, lr_0 = 3.5674e-04
Loss = 7.2856e-03, PNorm = 163.5188, GNorm = 0.3597, lr_0 = 3.5650e-04
Loss = 7.2980e-03, PNorm = 163.5352, GNorm = 0.3881, lr_0 = 3.5625e-04
Loss = 6.2224e-03, PNorm = 163.5508, GNorm = 0.3898, lr_0 = 3.5601e-04
Loss = 6.6235e-03, PNorm = 163.5658, GNorm = 0.2153, lr_0 = 3.5576e-04
Loss = 6.5405e-03, PNorm = 163.5792, GNorm = 0.1768, lr_0 = 3.5552e-04
Loss = 9.7804e-03, PNorm = 163.5892, GNorm = 0.2356, lr_0 = 3.5528e-04
Loss = 6.7868e-03, PNorm = 163.6028, GNorm = 0.2243, lr_0 = 3.5503e-04
Loss = 6.9212e-03, PNorm = 163.6220, GNorm = 0.1405, lr_0 = 3.5479e-04
Loss = 6.6149e-03, PNorm = 163.6423, GNorm = 0.2125, lr_0 = 3.5455e-04
Loss = 1.1214e-02, PNorm = 163.6588, GNorm = 0.2371, lr_0 = 3.5430e-04
Loss = 8.2095e-03, PNorm = 163.6781, GNorm = 0.4099, lr_0 = 3.5406e-04
Loss = 6.1053e-03, PNorm = 163.6953, GNorm = 0.1785, lr_0 = 3.5382e-04
Loss = 6.0356e-03, PNorm = 163.7084, GNorm = 0.1241, lr_0 = 3.5358e-04
Loss = 7.0030e-03, PNorm = 163.7225, GNorm = 0.1665, lr_0 = 3.5333e-04
Loss = 8.2488e-03, PNorm = 163.7368, GNorm = 0.1360, lr_0 = 3.5309e-04
Loss = 6.8253e-03, PNorm = 163.7488, GNorm = 0.3467, lr_0 = 3.5285e-04
Loss = 7.8049e-03, PNorm = 163.7641, GNorm = 0.3069, lr_0 = 3.5261e-04
Loss = 5.7139e-03, PNorm = 163.7834, GNorm = 0.3121, lr_0 = 3.5237e-04
Loss = 6.8544e-03, PNorm = 163.8011, GNorm = 0.2852, lr_0 = 3.5212e-04
Loss = 6.9054e-03, PNorm = 163.8182, GNorm = 0.3243, lr_0 = 3.5188e-04
Loss = 7.7943e-03, PNorm = 163.8357, GNorm = 0.3751, lr_0 = 3.5164e-04
Loss = 6.1653e-03, PNorm = 163.8536, GNorm = 0.2597, lr_0 = 3.5140e-04
Loss = 7.6245e-03, PNorm = 163.8681, GNorm = 0.3995, lr_0 = 3.5116e-04
Loss = 6.9823e-03, PNorm = 163.8816, GNorm = 0.2043, lr_0 = 3.5092e-04
Loss = 8.8324e-03, PNorm = 163.8974, GNorm = 0.1049, lr_0 = 3.5068e-04
Loss = 1.0439e-02, PNorm = 163.9116, GNorm = 0.6703, lr_0 = 3.5044e-04
Loss = 7.1749e-03, PNorm = 163.9288, GNorm = 0.1193, lr_0 = 3.5020e-04
Loss = 8.7919e-03, PNorm = 163.9449, GNorm = 0.2247, lr_0 = 3.4996e-04
Loss = 6.5225e-03, PNorm = 163.9627, GNorm = 0.1533, lr_0 = 3.4972e-04
Loss = 6.5014e-03, PNorm = 163.9761, GNorm = 0.1364, lr_0 = 3.4948e-04
Loss = 7.5856e-03, PNorm = 163.9912, GNorm = 0.1408, lr_0 = 3.4924e-04
Loss = 6.6892e-03, PNorm = 164.0091, GNorm = 0.4923, lr_0 = 3.4900e-04
Loss = 7.1937e-03, PNorm = 164.0245, GNorm = 0.4255, lr_0 = 3.4876e-04
Loss = 7.5813e-03, PNorm = 164.0401, GNorm = 0.2558, lr_0 = 3.4852e-04
Loss = 1.0209e-02, PNorm = 164.0562, GNorm = 0.0940, lr_0 = 3.4828e-04
Loss = 7.3869e-03, PNorm = 164.0740, GNorm = 0.1535, lr_0 = 3.4805e-04
Loss = 6.3036e-03, PNorm = 164.0913, GNorm = 0.2494, lr_0 = 3.4781e-04
Loss = 5.1718e-03, PNorm = 164.1081, GNorm = 0.2499, lr_0 = 3.4757e-04
Loss = 6.1965e-03, PNorm = 164.1214, GNorm = 0.2262, lr_0 = 3.4733e-04
Loss = 9.4675e-03, PNorm = 164.1336, GNorm = 0.4088, lr_0 = 3.4709e-04
Loss = 8.9115e-03, PNorm = 164.1437, GNorm = 0.0966, lr_0 = 3.4686e-04
Loss = 6.1412e-03, PNorm = 164.1575, GNorm = 0.0766, lr_0 = 3.4662e-04
Loss = 7.5133e-03, PNorm = 164.1738, GNorm = 0.1899, lr_0 = 3.4638e-04
Loss = 5.4868e-03, PNorm = 164.1903, GNorm = 0.1365, lr_0 = 3.4614e-04
Loss = 7.5711e-03, PNorm = 164.2091, GNorm = 0.3616, lr_0 = 3.4591e-04
Loss = 7.6860e-03, PNorm = 164.2270, GNorm = 0.3080, lr_0 = 3.4567e-04
Loss = 6.3737e-03, PNorm = 164.2413, GNorm = 0.0780, lr_0 = 3.4543e-04
Loss = 6.4406e-03, PNorm = 164.2563, GNorm = 0.1865, lr_0 = 3.4520e-04
Loss = 7.7788e-03, PNorm = 164.2713, GNorm = 0.1102, lr_0 = 3.4496e-04
Loss = 6.9094e-03, PNorm = 164.2883, GNorm = 0.2422, lr_0 = 3.4472e-04
Loss = 7.7001e-03, PNorm = 164.3034, GNorm = 0.6937, lr_0 = 3.4449e-04
Loss = 8.0335e-03, PNorm = 164.3191, GNorm = 0.3439, lr_0 = 3.4425e-04
Loss = 6.5461e-03, PNorm = 164.3434, GNorm = 0.0960, lr_0 = 3.4402e-04
Loss = 9.3983e-03, PNorm = 164.3614, GNorm = 0.1344, lr_0 = 3.4378e-04
Loss = 7.4525e-03, PNorm = 164.3751, GNorm = 0.1317, lr_0 = 3.4354e-04
Loss = 7.5001e-03, PNorm = 164.3907, GNorm = 0.2301, lr_0 = 3.4331e-04
Validation mae = 0.279312
Epoch 15
Loss = 8.0088e-03, PNorm = 164.4035, GNorm = 0.2463, lr_0 = 3.4307e-04
Loss = 6.4795e-03, PNorm = 164.4167, GNorm = 0.1186, lr_0 = 3.4284e-04
Loss = 6.0734e-03, PNorm = 164.4302, GNorm = 0.1464, lr_0 = 3.4260e-04
Loss = 7.1994e-03, PNorm = 164.4444, GNorm = 0.2698, lr_0 = 3.4237e-04
Loss = 5.8085e-03, PNorm = 164.4561, GNorm = 0.0929, lr_0 = 3.4213e-04
Loss = 6.2527e-03, PNorm = 164.4678, GNorm = 0.1618, lr_0 = 3.4190e-04
Loss = 5.4010e-03, PNorm = 164.4784, GNorm = 0.1206, lr_0 = 3.4167e-04
Loss = 5.9520e-03, PNorm = 164.4892, GNorm = 0.1255, lr_0 = 3.4143e-04
Loss = 7.1715e-03, PNorm = 164.4999, GNorm = 0.2476, lr_0 = 3.4120e-04
Loss = 6.0958e-03, PNorm = 164.5102, GNorm = 0.2726, lr_0 = 3.4096e-04
Loss = 7.5909e-03, PNorm = 164.5228, GNorm = 0.1059, lr_0 = 3.4073e-04
Loss = 6.1236e-03, PNorm = 164.5364, GNorm = 0.2819, lr_0 = 3.4050e-04
Loss = 6.9422e-03, PNorm = 164.5512, GNorm = 0.2218, lr_0 = 3.4026e-04
Loss = 6.0016e-03, PNorm = 164.5657, GNorm = 0.3235, lr_0 = 3.4003e-04
Loss = 6.2314e-03, PNorm = 164.5781, GNorm = 0.3819, lr_0 = 3.3980e-04
Loss = 5.7173e-03, PNorm = 164.5867, GNorm = 0.2838, lr_0 = 3.3956e-04
Loss = 6.1563e-03, PNorm = 164.5978, GNorm = 0.1974, lr_0 = 3.3933e-04
Loss = 6.0924e-03, PNorm = 164.6141, GNorm = 0.3086, lr_0 = 3.3910e-04
Loss = 5.5978e-03, PNorm = 164.6318, GNorm = 0.0957, lr_0 = 3.3887e-04
Loss = 5.6572e-03, PNorm = 164.6464, GNorm = 0.3473, lr_0 = 3.3864e-04
Loss = 5.3300e-03, PNorm = 164.6574, GNorm = 0.1594, lr_0 = 3.3840e-04
Loss = 6.2965e-03, PNorm = 164.6667, GNorm = 0.2154, lr_0 = 3.3817e-04
Loss = 5.8189e-03, PNorm = 164.6720, GNorm = 0.2513, lr_0 = 3.3794e-04
Loss = 5.3870e-03, PNorm = 164.6865, GNorm = 0.2235, lr_0 = 3.3771e-04
Loss = 6.6776e-03, PNorm = 164.6995, GNorm = 0.1774, lr_0 = 3.3748e-04
Loss = 7.4937e-03, PNorm = 164.7145, GNorm = 0.2182, lr_0 = 3.3725e-04
Loss = 7.0948e-03, PNorm = 164.7269, GNorm = 0.1401, lr_0 = 3.3701e-04
Loss = 6.1191e-03, PNorm = 164.7391, GNorm = 0.0880, lr_0 = 3.3678e-04
Loss = 5.2884e-03, PNorm = 164.7514, GNorm = 0.0871, lr_0 = 3.3655e-04
Loss = 7.4464e-03, PNorm = 164.7639, GNorm = 0.1625, lr_0 = 3.3632e-04
Loss = 6.1317e-03, PNorm = 164.7739, GNorm = 0.2002, lr_0 = 3.3609e-04
Loss = 6.0483e-03, PNorm = 164.7823, GNorm = 0.3073, lr_0 = 3.3586e-04
Loss = 1.1005e-02, PNorm = 164.7931, GNorm = 0.1707, lr_0 = 3.3563e-04
Loss = 6.1302e-03, PNorm = 164.8067, GNorm = 0.2381, lr_0 = 3.3540e-04
Loss = 7.1845e-03, PNorm = 164.8213, GNorm = 0.1706, lr_0 = 3.3517e-04
Loss = 5.4647e-03, PNorm = 164.8347, GNorm = 0.1642, lr_0 = 3.3494e-04
Loss = 6.3702e-03, PNorm = 164.8476, GNorm = 0.1972, lr_0 = 3.3471e-04
Loss = 5.4670e-03, PNorm = 164.8604, GNorm = 0.1391, lr_0 = 3.3448e-04
Loss = 7.5758e-03, PNorm = 164.8737, GNorm = 0.5314, lr_0 = 3.3425e-04
Loss = 7.4903e-03, PNorm = 164.8888, GNorm = 0.3316, lr_0 = 3.3403e-04
Loss = 6.1336e-03, PNorm = 164.9001, GNorm = 0.1454, lr_0 = 3.3380e-04
Loss = 5.4397e-03, PNorm = 164.9130, GNorm = 0.2778, lr_0 = 3.3357e-04
Loss = 7.2249e-03, PNorm = 164.9294, GNorm = 0.1144, lr_0 = 3.3334e-04
Loss = 9.7489e-03, PNorm = 164.9411, GNorm = 0.1840, lr_0 = 3.3311e-04
Loss = 6.1514e-03, PNorm = 164.9497, GNorm = 0.3956, lr_0 = 3.3288e-04
Loss = 5.5533e-03, PNorm = 164.9600, GNorm = 0.2896, lr_0 = 3.3265e-04
Loss = 7.3549e-03, PNorm = 164.9725, GNorm = 0.2273, lr_0 = 3.3243e-04
Loss = 5.7471e-03, PNorm = 164.9854, GNorm = 0.2835, lr_0 = 3.3220e-04
Loss = 5.7348e-03, PNorm = 164.9963, GNorm = 0.1743, lr_0 = 3.3197e-04
Loss = 6.3215e-03, PNorm = 165.0086, GNorm = 0.2098, lr_0 = 3.3174e-04
Loss = 7.5675e-03, PNorm = 165.0216, GNorm = 0.2304, lr_0 = 3.3152e-04
Loss = 5.2630e-03, PNorm = 165.0388, GNorm = 0.6366, lr_0 = 3.3129e-04
Loss = 6.3348e-03, PNorm = 165.0532, GNorm = 0.4699, lr_0 = 3.3106e-04
Loss = 6.1116e-03, PNorm = 165.0671, GNorm = 0.1751, lr_0 = 3.3084e-04
Loss = 6.1089e-03, PNorm = 165.0791, GNorm = 0.1862, lr_0 = 3.3061e-04
Loss = 4.9167e-03, PNorm = 165.0901, GNorm = 0.1053, lr_0 = 3.3038e-04
Loss = 7.6368e-03, PNorm = 165.1031, GNorm = 0.1510, lr_0 = 3.3016e-04
Loss = 6.3037e-03, PNorm = 165.1158, GNorm = 0.2696, lr_0 = 3.2993e-04
Loss = 5.9049e-03, PNorm = 165.1270, GNorm = 0.1568, lr_0 = 3.2970e-04
Loss = 5.3189e-03, PNorm = 165.1408, GNorm = 0.2404, lr_0 = 3.2948e-04
Loss = 5.8020e-03, PNorm = 165.1532, GNorm = 0.1574, lr_0 = 3.2925e-04
Loss = 5.1226e-03, PNorm = 165.1661, GNorm = 0.0796, lr_0 = 3.2903e-04
Loss = 4.9660e-03, PNorm = 165.1750, GNorm = 0.1802, lr_0 = 3.2880e-04
Loss = 7.0609e-03, PNorm = 165.1862, GNorm = 0.2179, lr_0 = 3.2858e-04
Loss = 5.8971e-03, PNorm = 165.2001, GNorm = 0.1878, lr_0 = 3.2835e-04
Loss = 5.1974e-03, PNorm = 165.2127, GNorm = 0.1392, lr_0 = 3.2813e-04
Loss = 6.2130e-03, PNorm = 165.2252, GNorm = 0.3482, lr_0 = 3.2790e-04
Loss = 7.1785e-03, PNorm = 165.2389, GNorm = 0.1445, lr_0 = 3.2768e-04
Loss = 6.2607e-03, PNorm = 165.2545, GNorm = 0.2486, lr_0 = 3.2745e-04
Loss = 5.7063e-03, PNorm = 165.2667, GNorm = 0.2188, lr_0 = 3.2723e-04
Loss = 5.8103e-03, PNorm = 165.2769, GNorm = 0.4003, lr_0 = 3.2700e-04
Loss = 5.6954e-03, PNorm = 165.2902, GNorm = 0.1026, lr_0 = 3.2678e-04
Loss = 5.3727e-03, PNorm = 165.3026, GNorm = 0.1172, lr_0 = 3.2656e-04
Loss = 4.8798e-03, PNorm = 165.3142, GNorm = 0.0917, lr_0 = 3.2633e-04
Loss = 6.1073e-03, PNorm = 165.3256, GNorm = 0.1791, lr_0 = 3.2611e-04
Loss = 5.2699e-03, PNorm = 165.3383, GNorm = 0.3899, lr_0 = 3.2589e-04
Loss = 4.6171e-03, PNorm = 165.3509, GNorm = 0.1450, lr_0 = 3.2566e-04
Loss = 6.4217e-03, PNorm = 165.3648, GNorm = 0.1892, lr_0 = 3.2544e-04
Loss = 6.3682e-03, PNorm = 165.3794, GNorm = 0.3779, lr_0 = 3.2522e-04
Loss = 5.6932e-03, PNorm = 165.3938, GNorm = 0.2236, lr_0 = 3.2499e-04
Loss = 6.2692e-03, PNorm = 165.4104, GNorm = 0.3731, lr_0 = 3.2477e-04
Loss = 7.0544e-03, PNorm = 165.4271, GNorm = 0.3014, lr_0 = 3.2455e-04
Loss = 5.7860e-03, PNorm = 165.4425, GNorm = 0.3459, lr_0 = 3.2433e-04
Loss = 7.0397e-03, PNorm = 165.4582, GNorm = 0.4211, lr_0 = 3.2410e-04
Loss = 5.7037e-03, PNorm = 165.4737, GNorm = 0.2738, lr_0 = 3.2388e-04
Loss = 6.2903e-03, PNorm = 165.4862, GNorm = 0.3217, lr_0 = 3.2366e-04
Loss = 6.1539e-03, PNorm = 165.5013, GNorm = 0.0918, lr_0 = 3.2344e-04
Loss = 5.1562e-03, PNorm = 165.5123, GNorm = 0.3536, lr_0 = 3.2322e-04
Loss = 6.0715e-03, PNorm = 165.5264, GNorm = 0.1735, lr_0 = 3.2300e-04
Loss = 5.3085e-03, PNorm = 165.5422, GNorm = 0.2286, lr_0 = 3.2277e-04
Loss = 9.4561e-03, PNorm = 165.5525, GNorm = 0.1170, lr_0 = 3.2255e-04
Loss = 6.8278e-03, PNorm = 165.5604, GNorm = 0.1637, lr_0 = 3.2233e-04
Loss = 5.8785e-03, PNorm = 165.5725, GNorm = 0.2808, lr_0 = 3.2211e-04
Loss = 5.5320e-03, PNorm = 165.5861, GNorm = 0.0671, lr_0 = 3.2189e-04
Loss = 7.4258e-03, PNorm = 165.6006, GNorm = 0.3603, lr_0 = 3.2167e-04
Loss = 7.2420e-03, PNorm = 165.6163, GNorm = 0.2661, lr_0 = 3.2145e-04
Loss = 5.6297e-03, PNorm = 165.6299, GNorm = 0.1472, lr_0 = 3.2123e-04
Loss = 6.8509e-03, PNorm = 165.6422, GNorm = 0.1191, lr_0 = 3.2101e-04
Loss = 6.5869e-03, PNorm = 165.6550, GNorm = 0.2564, lr_0 = 3.2079e-04
Loss = 7.2080e-03, PNorm = 165.6672, GNorm = 0.5120, lr_0 = 3.2057e-04
Loss = 5.5970e-03, PNorm = 165.6827, GNorm = 0.0966, lr_0 = 3.2035e-04
Loss = 7.7867e-03, PNorm = 165.6958, GNorm = 0.1791, lr_0 = 3.2013e-04
Loss = 4.8057e-03, PNorm = 165.7094, GNorm = 0.2494, lr_0 = 3.1991e-04
Loss = 5.7808e-03, PNorm = 165.7212, GNorm = 0.5027, lr_0 = 3.1969e-04
Loss = 5.9459e-03, PNorm = 165.7354, GNorm = 0.1534, lr_0 = 3.1947e-04
Loss = 5.7596e-03, PNorm = 165.7480, GNorm = 0.1317, lr_0 = 3.1925e-04
Loss = 6.6008e-03, PNorm = 165.7604, GNorm = 0.1039, lr_0 = 3.1904e-04
Loss = 5.0413e-03, PNorm = 165.7715, GNorm = 0.0789, lr_0 = 3.1882e-04
Loss = 6.7682e-03, PNorm = 165.7822, GNorm = 0.2047, lr_0 = 3.1860e-04
Loss = 9.0700e-03, PNorm = 165.7967, GNorm = 0.3613, lr_0 = 3.1838e-04
Loss = 4.3336e-03, PNorm = 165.8138, GNorm = 0.1716, lr_0 = 3.1816e-04
Loss = 5.6799e-03, PNorm = 165.8273, GNorm = 0.2776, lr_0 = 3.1794e-04
Loss = 4.9537e-03, PNorm = 165.8422, GNorm = 0.4402, lr_0 = 3.1773e-04
Loss = 8.2664e-03, PNorm = 165.8555, GNorm = 0.1341, lr_0 = 3.1751e-04
Loss = 5.7688e-03, PNorm = 165.8679, GNorm = 0.3782, lr_0 = 3.1729e-04
Loss = 5.8045e-03, PNorm = 165.8828, GNorm = 0.1038, lr_0 = 3.1707e-04
Loss = 5.6803e-03, PNorm = 165.8933, GNorm = 0.3332, lr_0 = 3.1686e-04
Loss = 6.5216e-03, PNorm = 165.9072, GNorm = 0.2183, lr_0 = 3.1664e-04
Loss = 5.6686e-03, PNorm = 165.9209, GNorm = 0.1982, lr_0 = 3.1642e-04
Loss = 6.2629e-03, PNorm = 165.9343, GNorm = 0.1255, lr_0 = 3.1621e-04
Validation mae = 0.278602
Epoch 16
Loss = 4.8890e-03, PNorm = 165.9474, GNorm = 0.2164, lr_0 = 3.1599e-04
Loss = 5.8031e-03, PNorm = 165.9584, GNorm = 0.2216, lr_0 = 3.1577e-04
Loss = 6.1454e-03, PNorm = 165.9694, GNorm = 0.1967, lr_0 = 3.1556e-04
Loss = 5.6727e-03, PNorm = 165.9814, GNorm = 0.1136, lr_0 = 3.1534e-04
Loss = 4.9383e-03, PNorm = 165.9925, GNorm = 0.2065, lr_0 = 3.1512e-04
Loss = 4.7856e-03, PNorm = 166.0020, GNorm = 0.1704, lr_0 = 3.1491e-04
Loss = 5.7430e-03, PNorm = 166.0114, GNorm = 0.2907, lr_0 = 3.1469e-04
Loss = 5.9286e-03, PNorm = 166.0223, GNorm = 0.2515, lr_0 = 3.1448e-04
Loss = 6.6907e-03, PNorm = 166.0307, GNorm = 0.1320, lr_0 = 3.1426e-04
Loss = 4.4339e-03, PNorm = 166.0397, GNorm = 0.1152, lr_0 = 3.1405e-04
Loss = 9.2521e-03, PNorm = 166.0491, GNorm = 0.2370, lr_0 = 3.1383e-04
Loss = 5.2468e-03, PNorm = 166.0598, GNorm = 0.2121, lr_0 = 3.1362e-04
Loss = 5.8344e-03, PNorm = 166.0731, GNorm = 0.2275, lr_0 = 3.1340e-04
Loss = 5.8491e-03, PNorm = 166.0851, GNorm = 0.3305, lr_0 = 3.1319e-04
Loss = 5.2201e-03, PNorm = 166.0980, GNorm = 0.1024, lr_0 = 3.1297e-04
Loss = 6.7121e-03, PNorm = 166.1076, GNorm = 0.2074, lr_0 = 3.1276e-04
Loss = 5.4119e-03, PNorm = 166.1202, GNorm = 0.1843, lr_0 = 3.1254e-04
Loss = 6.5610e-03, PNorm = 166.1340, GNorm = 0.2357, lr_0 = 3.1233e-04
Loss = 5.0185e-03, PNorm = 166.1461, GNorm = 0.2065, lr_0 = 3.1212e-04
Loss = 5.4226e-03, PNorm = 166.1557, GNorm = 0.1212, lr_0 = 3.1190e-04
Loss = 5.3471e-03, PNorm = 166.1680, GNorm = 0.2009, lr_0 = 3.1169e-04
Loss = 5.7599e-03, PNorm = 166.1802, GNorm = 0.2048, lr_0 = 3.1147e-04
Loss = 5.5484e-03, PNorm = 166.1892, GNorm = 0.3548, lr_0 = 3.1126e-04
Loss = 5.6137e-03, PNorm = 166.2009, GNorm = 0.1352, lr_0 = 3.1105e-04
Loss = 4.9420e-03, PNorm = 166.2113, GNorm = 0.1195, lr_0 = 3.1083e-04
Loss = 5.8753e-03, PNorm = 166.2217, GNorm = 0.1867, lr_0 = 3.1062e-04
Loss = 4.1440e-03, PNorm = 166.2346, GNorm = 0.1635, lr_0 = 3.1041e-04
Loss = 4.5149e-03, PNorm = 166.2463, GNorm = 0.2679, lr_0 = 3.1020e-04
Loss = 4.8103e-03, PNorm = 166.2574, GNorm = 0.4122, lr_0 = 3.0998e-04
Loss = 5.9201e-03, PNorm = 166.2690, GNorm = 0.0968, lr_0 = 3.0977e-04
Loss = 3.8644e-03, PNorm = 166.2781, GNorm = 0.2038, lr_0 = 3.0956e-04
Loss = 4.7163e-03, PNorm = 166.2859, GNorm = 0.5171, lr_0 = 3.0935e-04
Loss = 5.8538e-03, PNorm = 166.2934, GNorm = 0.2712, lr_0 = 3.0914e-04
Loss = 7.4776e-03, PNorm = 166.3026, GNorm = 0.1518, lr_0 = 3.0892e-04
Loss = 4.6210e-03, PNorm = 166.3141, GNorm = 0.2654, lr_0 = 3.0871e-04
Loss = 4.8026e-03, PNorm = 166.3261, GNorm = 0.1202, lr_0 = 3.0850e-04
Loss = 4.5525e-03, PNorm = 166.3358, GNorm = 0.1260, lr_0 = 3.0829e-04
Loss = 5.2651e-03, PNorm = 166.3488, GNorm = 0.2455, lr_0 = 3.0808e-04
Loss = 5.5269e-03, PNorm = 166.3627, GNorm = 0.0935, lr_0 = 3.0787e-04
Loss = 4.2428e-03, PNorm = 166.3738, GNorm = 0.1553, lr_0 = 3.0766e-04
Loss = 5.0927e-03, PNorm = 166.3807, GNorm = 0.1343, lr_0 = 3.0745e-04
Loss = 6.0204e-03, PNorm = 166.3912, GNorm = 0.2262, lr_0 = 3.0723e-04
Loss = 4.7248e-03, PNorm = 166.4031, GNorm = 0.1493, lr_0 = 3.0702e-04
Loss = 6.6100e-03, PNorm = 166.4159, GNorm = 0.1651, lr_0 = 3.0681e-04
Loss = 5.1362e-03, PNorm = 166.4267, GNorm = 0.2518, lr_0 = 3.0660e-04
Loss = 6.0028e-03, PNorm = 166.4371, GNorm = 0.1251, lr_0 = 3.0639e-04
Loss = 5.6634e-03, PNorm = 166.4501, GNorm = 0.0552, lr_0 = 3.0618e-04
Loss = 6.2957e-03, PNorm = 166.4629, GNorm = 0.1362, lr_0 = 3.0597e-04
Loss = 4.6380e-03, PNorm = 166.4703, GNorm = 0.1162, lr_0 = 3.0576e-04
Loss = 5.5557e-03, PNorm = 166.4798, GNorm = 0.2195, lr_0 = 3.0555e-04
Loss = 5.7741e-03, PNorm = 166.4910, GNorm = 0.1580, lr_0 = 3.0535e-04
Loss = 4.8703e-03, PNorm = 166.5004, GNorm = 0.5424, lr_0 = 3.0514e-04
Loss = 7.9802e-03, PNorm = 166.5075, GNorm = 0.3641, lr_0 = 3.0493e-04
Loss = 6.4092e-03, PNorm = 166.5164, GNorm = 0.0936, lr_0 = 3.0472e-04
Loss = 5.2150e-03, PNorm = 166.5273, GNorm = 0.2486, lr_0 = 3.0451e-04
Loss = 7.7867e-03, PNorm = 166.5375, GNorm = 0.4000, lr_0 = 3.0430e-04
Loss = 5.3631e-03, PNorm = 166.5497, GNorm = 0.1809, lr_0 = 3.0409e-04
Loss = 4.5283e-03, PNorm = 166.5599, GNorm = 0.1885, lr_0 = 3.0388e-04
Loss = 4.6909e-03, PNorm = 166.5722, GNorm = 0.4431, lr_0 = 3.0368e-04
Loss = 6.6571e-03, PNorm = 166.5833, GNorm = 0.2138, lr_0 = 3.0347e-04
Loss = 4.8442e-03, PNorm = 166.5964, GNorm = 0.1215, lr_0 = 3.0326e-04
Loss = 5.3555e-03, PNorm = 166.6068, GNorm = 0.1314, lr_0 = 3.0305e-04
Loss = 5.7393e-03, PNorm = 166.6169, GNorm = 0.1842, lr_0 = 3.0284e-04
Loss = 4.6941e-03, PNorm = 166.6264, GNorm = 0.2227, lr_0 = 3.0264e-04
Loss = 4.2997e-03, PNorm = 166.6361, GNorm = 0.1437, lr_0 = 3.0243e-04
Loss = 6.6013e-03, PNorm = 166.6478, GNorm = 0.1669, lr_0 = 3.0222e-04
Loss = 4.2058e-03, PNorm = 166.6592, GNorm = 0.1783, lr_0 = 3.0202e-04
Loss = 4.3030e-03, PNorm = 166.6734, GNorm = 0.1917, lr_0 = 3.0181e-04
Loss = 4.2333e-03, PNorm = 166.6841, GNorm = 0.1067, lr_0 = 3.0160e-04
Loss = 4.3869e-03, PNorm = 166.6902, GNorm = 0.3873, lr_0 = 3.0140e-04
Loss = 5.1898e-03, PNorm = 166.6982, GNorm = 0.3281, lr_0 = 3.0119e-04
Loss = 5.4115e-03, PNorm = 166.7061, GNorm = 0.1327, lr_0 = 3.0098e-04
Loss = 7.2747e-03, PNorm = 166.7148, GNorm = 0.0714, lr_0 = 3.0078e-04
Loss = 6.2506e-03, PNorm = 166.7248, GNorm = 0.2570, lr_0 = 3.0057e-04
Loss = 8.0613e-03, PNorm = 166.7378, GNorm = 0.1320, lr_0 = 3.0036e-04
Loss = 5.6964e-03, PNorm = 166.7509, GNorm = 0.1835, lr_0 = 3.0016e-04
Loss = 8.2009e-03, PNorm = 166.7626, GNorm = 0.2171, lr_0 = 2.9995e-04
Loss = 4.5079e-03, PNorm = 166.7721, GNorm = 0.3804, lr_0 = 2.9975e-04
Loss = 5.7645e-03, PNorm = 166.7821, GNorm = 0.2543, lr_0 = 2.9954e-04
Loss = 5.7356e-03, PNorm = 166.7940, GNorm = 0.5003, lr_0 = 2.9934e-04
Loss = 4.5095e-03, PNorm = 166.8045, GNorm = 0.3129, lr_0 = 2.9913e-04
Loss = 4.8600e-03, PNorm = 166.8174, GNorm = 0.2820, lr_0 = 2.9893e-04
Loss = 4.1718e-03, PNorm = 166.8289, GNorm = 0.1429, lr_0 = 2.9872e-04
Loss = 5.5217e-03, PNorm = 166.8386, GNorm = 0.4982, lr_0 = 2.9852e-04
Loss = 5.1260e-03, PNorm = 166.8480, GNorm = 0.1964, lr_0 = 2.9831e-04
Loss = 5.0024e-03, PNorm = 166.8577, GNorm = 0.1453, lr_0 = 2.9811e-04
Loss = 4.5099e-03, PNorm = 166.8692, GNorm = 0.3004, lr_0 = 2.9790e-04
Loss = 4.4882e-03, PNorm = 166.8799, GNorm = 0.3107, lr_0 = 2.9770e-04
Loss = 3.6405e-03, PNorm = 166.8949, GNorm = 0.1504, lr_0 = 2.9750e-04
Loss = 5.7975e-03, PNorm = 166.9053, GNorm = 0.3115, lr_0 = 2.9729e-04
Loss = 4.9110e-03, PNorm = 166.9153, GNorm = 0.1129, lr_0 = 2.9709e-04
Loss = 5.1448e-03, PNorm = 166.9249, GNorm = 0.1773, lr_0 = 2.9689e-04
Loss = 4.8134e-03, PNorm = 166.9340, GNorm = 0.1330, lr_0 = 2.9668e-04
Loss = 4.4000e-03, PNorm = 166.9433, GNorm = 0.1410, lr_0 = 2.9648e-04
Loss = 5.8734e-03, PNorm = 166.9568, GNorm = 0.2923, lr_0 = 2.9628e-04
Loss = 6.3120e-03, PNorm = 166.9684, GNorm = 0.2873, lr_0 = 2.9607e-04
Loss = 5.4211e-03, PNorm = 166.9788, GNorm = 0.2506, lr_0 = 2.9587e-04
Loss = 6.1002e-03, PNorm = 166.9877, GNorm = 0.5056, lr_0 = 2.9567e-04
Loss = 4.5317e-03, PNorm = 166.9968, GNorm = 0.3156, lr_0 = 2.9546e-04
Loss = 4.8796e-03, PNorm = 167.0059, GNorm = 0.0993, lr_0 = 2.9526e-04
Loss = 4.2646e-03, PNorm = 167.0178, GNorm = 0.1267, lr_0 = 2.9506e-04
Loss = 9.1693e-03, PNorm = 167.0286, GNorm = 0.2130, lr_0 = 2.9486e-04
Loss = 4.9905e-03, PNorm = 167.0389, GNorm = 0.3058, lr_0 = 2.9466e-04
Loss = 4.9624e-03, PNorm = 167.0533, GNorm = 0.1727, lr_0 = 2.9445e-04
Loss = 6.5952e-03, PNorm = 167.0665, GNorm = 0.0874, lr_0 = 2.9425e-04
Loss = 4.9142e-03, PNorm = 167.0783, GNorm = 0.3276, lr_0 = 2.9405e-04
Loss = 5.5436e-03, PNorm = 167.0885, GNorm = 0.1560, lr_0 = 2.9385e-04
Loss = 4.2363e-03, PNorm = 167.0980, GNorm = 0.0798, lr_0 = 2.9365e-04
Loss = 4.1013e-03, PNorm = 167.1068, GNorm = 0.1586, lr_0 = 2.9345e-04
Loss = 5.1269e-03, PNorm = 167.1166, GNorm = 0.2794, lr_0 = 2.9325e-04
Loss = 6.9060e-03, PNorm = 167.1258, GNorm = 0.3732, lr_0 = 2.9305e-04
Loss = 4.7323e-03, PNorm = 167.1344, GNorm = 0.1740, lr_0 = 2.9284e-04
Loss = 3.7570e-03, PNorm = 167.1477, GNorm = 0.3192, lr_0 = 2.9264e-04
Loss = 5.6684e-03, PNorm = 167.1596, GNorm = 0.1672, lr_0 = 2.9244e-04
Loss = 5.7925e-03, PNorm = 167.1715, GNorm = 0.1629, lr_0 = 2.9224e-04
Loss = 5.3472e-03, PNorm = 167.1841, GNorm = 0.1038, lr_0 = 2.9204e-04
Loss = 7.1340e-03, PNorm = 167.1955, GNorm = 0.1579, lr_0 = 2.9184e-04
Loss = 4.6568e-03, PNorm = 167.2090, GNorm = 0.1789, lr_0 = 2.9164e-04
Loss = 4.3350e-03, PNorm = 167.2208, GNorm = 0.1542, lr_0 = 2.9144e-04
Loss = 4.8046e-03, PNorm = 167.2343, GNorm = 0.2853, lr_0 = 2.9124e-04
Validation mae = 0.278701
Epoch 17
Loss = 4.3636e-03, PNorm = 167.2439, GNorm = 0.1577, lr_0 = 2.9104e-04
Loss = 4.8042e-03, PNorm = 167.2554, GNorm = 0.2120, lr_0 = 2.9084e-04
Loss = 5.8307e-03, PNorm = 167.2660, GNorm = 0.0626, lr_0 = 2.9065e-04
Loss = 4.6155e-03, PNorm = 167.2728, GNorm = 0.2513, lr_0 = 2.9045e-04
Loss = 4.2320e-03, PNorm = 167.2806, GNorm = 0.1511, lr_0 = 2.9025e-04
Loss = 4.1769e-03, PNorm = 167.2872, GNorm = 0.1628, lr_0 = 2.9005e-04
Loss = 3.9234e-03, PNorm = 167.2968, GNorm = 0.1604, lr_0 = 2.8985e-04
Loss = 4.6320e-03, PNorm = 167.3059, GNorm = 0.0836, lr_0 = 2.8965e-04
Loss = 4.5666e-03, PNorm = 167.3134, GNorm = 0.4797, lr_0 = 2.8945e-04
Loss = 4.3023e-03, PNorm = 167.3207, GNorm = 0.2160, lr_0 = 2.8925e-04
Loss = 3.5372e-03, PNorm = 167.3271, GNorm = 0.1378, lr_0 = 2.8906e-04
Loss = 4.7132e-03, PNorm = 167.3340, GNorm = 0.4974, lr_0 = 2.8886e-04
Loss = 4.3902e-03, PNorm = 167.3435, GNorm = 0.0804, lr_0 = 2.8866e-04
Loss = 4.4230e-03, PNorm = 167.3491, GNorm = 0.0672, lr_0 = 2.8846e-04
Loss = 4.4991e-03, PNorm = 167.3558, GNorm = 0.2456, lr_0 = 2.8826e-04
Loss = 5.7295e-03, PNorm = 167.3626, GNorm = 0.2643, lr_0 = 2.8807e-04
Loss = 4.3195e-03, PNorm = 167.3709, GNorm = 0.0926, lr_0 = 2.8787e-04
Loss = 4.5122e-03, PNorm = 167.3789, GNorm = 0.1693, lr_0 = 2.8767e-04
Loss = 4.4530e-03, PNorm = 167.3875, GNorm = 0.0842, lr_0 = 2.8748e-04
Loss = 4.9128e-03, PNorm = 167.3953, GNorm = 0.3242, lr_0 = 2.8728e-04
Loss = 4.9169e-03, PNorm = 167.4035, GNorm = 0.1934, lr_0 = 2.8708e-04
Loss = 6.5781e-03, PNorm = 167.4163, GNorm = 0.1958, lr_0 = 2.8689e-04
Loss = 5.6977e-03, PNorm = 167.4244, GNorm = 0.2191, lr_0 = 2.8669e-04
Loss = 4.0470e-03, PNorm = 167.4342, GNorm = 0.1601, lr_0 = 2.8649e-04
Loss = 4.3854e-03, PNorm = 167.4419, GNorm = 0.0693, lr_0 = 2.8630e-04
Loss = 4.8471e-03, PNorm = 167.4496, GNorm = 0.1593, lr_0 = 2.8610e-04
Loss = 5.2025e-03, PNorm = 167.4564, GNorm = 0.1121, lr_0 = 2.8590e-04
Loss = 4.4493e-03, PNorm = 167.4619, GNorm = 0.1062, lr_0 = 2.8571e-04
Loss = 5.6097e-03, PNorm = 167.4676, GNorm = 0.2099, lr_0 = 2.8551e-04
Loss = 8.7110e-03, PNorm = 167.4734, GNorm = 0.4624, lr_0 = 2.8532e-04
Loss = 3.5981e-03, PNorm = 167.4861, GNorm = 0.1483, lr_0 = 2.8512e-04
Loss = 4.1067e-03, PNorm = 167.4945, GNorm = 0.2211, lr_0 = 2.8493e-04
Loss = 4.1056e-03, PNorm = 167.5060, GNorm = 0.4448, lr_0 = 2.8473e-04
Loss = 4.2618e-03, PNorm = 167.5152, GNorm = 0.1393, lr_0 = 2.8454e-04
Loss = 4.8204e-03, PNorm = 167.5252, GNorm = 0.1345, lr_0 = 2.8434e-04
Loss = 4.5927e-03, PNorm = 167.5335, GNorm = 0.1900, lr_0 = 2.8415e-04
Loss = 5.0210e-03, PNorm = 167.5416, GNorm = 0.1382, lr_0 = 2.8395e-04
Loss = 4.3832e-03, PNorm = 167.5500, GNorm = 0.1176, lr_0 = 2.8376e-04
Loss = 3.5775e-03, PNorm = 167.5589, GNorm = 0.2439, lr_0 = 2.8356e-04
Loss = 4.6906e-03, PNorm = 167.5678, GNorm = 0.2270, lr_0 = 2.8337e-04
Loss = 6.6520e-03, PNorm = 167.5769, GNorm = 0.2636, lr_0 = 2.8317e-04
Loss = 3.9657e-03, PNorm = 167.5868, GNorm = 0.0798, lr_0 = 2.8298e-04
Loss = 6.3039e-03, PNorm = 167.5936, GNorm = 0.3875, lr_0 = 2.8279e-04
Loss = 3.6849e-03, PNorm = 167.6045, GNorm = 0.2061, lr_0 = 2.8259e-04
Loss = 4.9866e-03, PNorm = 167.6147, GNorm = 0.2093, lr_0 = 2.8240e-04
Loss = 4.9167e-03, PNorm = 167.6245, GNorm = 0.5226, lr_0 = 2.8221e-04
Loss = 4.9693e-03, PNorm = 167.6323, GNorm = 0.1960, lr_0 = 2.8201e-04
Loss = 5.5319e-03, PNorm = 167.6387, GNorm = 0.1608, lr_0 = 2.8182e-04
Loss = 5.2352e-03, PNorm = 167.6471, GNorm = 0.1315, lr_0 = 2.8163e-04
Loss = 3.7949e-03, PNorm = 167.6591, GNorm = 0.2020, lr_0 = 2.8143e-04
Loss = 3.4220e-03, PNorm = 167.6671, GNorm = 0.4888, lr_0 = 2.8124e-04
Loss = 6.5185e-03, PNorm = 167.6762, GNorm = 0.4493, lr_0 = 2.8105e-04
Loss = 4.1434e-03, PNorm = 167.6863, GNorm = 0.1527, lr_0 = 2.8085e-04
Loss = 5.4778e-03, PNorm = 167.6976, GNorm = 0.2691, lr_0 = 2.8066e-04
Loss = 7.9681e-03, PNorm = 167.7047, GNorm = 0.5653, lr_0 = 2.8047e-04
Loss = 4.6370e-03, PNorm = 167.7152, GNorm = 0.2283, lr_0 = 2.8028e-04
Loss = 4.1957e-03, PNorm = 167.7242, GNorm = 0.1235, lr_0 = 2.8009e-04
Loss = 6.1936e-03, PNorm = 167.7316, GNorm = 0.1288, lr_0 = 2.7989e-04
Loss = 4.8824e-03, PNorm = 167.7416, GNorm = 0.1079, lr_0 = 2.7970e-04
Loss = 4.0107e-03, PNorm = 167.7526, GNorm = 0.3079, lr_0 = 2.7951e-04
Loss = 5.5858e-03, PNorm = 167.7648, GNorm = 0.0958, lr_0 = 2.7932e-04
Loss = 4.2750e-03, PNorm = 167.7752, GNorm = 0.2066, lr_0 = 2.7913e-04
Loss = 3.6186e-03, PNorm = 167.7829, GNorm = 0.0847, lr_0 = 2.7894e-04
Loss = 3.3748e-03, PNorm = 167.7900, GNorm = 0.2062, lr_0 = 2.7875e-04
Loss = 4.4678e-03, PNorm = 167.7964, GNorm = 0.1684, lr_0 = 2.7855e-04
Loss = 4.0870e-03, PNorm = 167.8067, GNorm = 0.1246, lr_0 = 2.7836e-04
Loss = 3.9817e-03, PNorm = 167.8134, GNorm = 0.2293, lr_0 = 2.7817e-04
Loss = 3.2474e-03, PNorm = 167.8205, GNorm = 0.1478, lr_0 = 2.7798e-04
Loss = 3.6938e-03, PNorm = 167.8238, GNorm = 0.0791, lr_0 = 2.7779e-04
Loss = 3.8970e-03, PNorm = 167.8322, GNorm = 0.1278, lr_0 = 2.7760e-04
Loss = 4.9985e-03, PNorm = 167.8441, GNorm = 0.2222, lr_0 = 2.7741e-04
Loss = 4.7105e-03, PNorm = 167.8550, GNorm = 0.1188, lr_0 = 2.7722e-04
Loss = 4.8879e-03, PNorm = 167.8646, GNorm = 0.3795, lr_0 = 2.7703e-04
Loss = 7.2799e-03, PNorm = 167.8751, GNorm = 0.2042, lr_0 = 2.7684e-04
Loss = 5.2259e-03, PNorm = 167.8797, GNorm = 0.1254, lr_0 = 2.7665e-04
Loss = 6.2607e-03, PNorm = 167.8865, GNorm = 0.4896, lr_0 = 2.7646e-04
Loss = 3.5626e-03, PNorm = 167.8934, GNorm = 0.2142, lr_0 = 2.7627e-04
Loss = 6.2711e-03, PNorm = 167.9038, GNorm = 0.2434, lr_0 = 2.7608e-04
Loss = 5.5660e-03, PNorm = 167.9167, GNorm = 0.2141, lr_0 = 2.7590e-04
Loss = 5.0400e-03, PNorm = 167.9277, GNorm = 0.6056, lr_0 = 2.7571e-04
Loss = 4.7188e-03, PNorm = 167.9392, GNorm = 0.2607, lr_0 = 2.7552e-04
Loss = 4.8771e-03, PNorm = 167.9480, GNorm = 0.3063, lr_0 = 2.7533e-04
Loss = 4.5524e-03, PNorm = 167.9578, GNorm = 0.0881, lr_0 = 2.7514e-04
Loss = 4.5989e-03, PNorm = 167.9692, GNorm = 0.2468, lr_0 = 2.7495e-04
Loss = 4.7421e-03, PNorm = 167.9801, GNorm = 0.2766, lr_0 = 2.7476e-04
Loss = 4.5156e-03, PNorm = 167.9874, GNorm = 0.3080, lr_0 = 2.7457e-04
Loss = 5.1746e-03, PNorm = 167.9959, GNorm = 0.1831, lr_0 = 2.7439e-04
Loss = 4.6940e-03, PNorm = 168.0038, GNorm = 0.2626, lr_0 = 2.7420e-04
Loss = 4.1151e-03, PNorm = 168.0101, GNorm = 0.1757, lr_0 = 2.7401e-04
Loss = 4.6165e-03, PNorm = 168.0177, GNorm = 0.1223, lr_0 = 2.7382e-04
Loss = 6.0638e-03, PNorm = 168.0286, GNorm = 0.4847, lr_0 = 2.7364e-04
Loss = 4.2574e-03, PNorm = 168.0403, GNorm = 0.0889, lr_0 = 2.7345e-04
Loss = 5.8036e-03, PNorm = 168.0499, GNorm = 0.1054, lr_0 = 2.7326e-04
Loss = 3.5240e-03, PNorm = 168.0585, GNorm = 0.3507, lr_0 = 2.7307e-04
Loss = 5.1260e-03, PNorm = 168.0656, GNorm = 0.4736, lr_0 = 2.7289e-04
Loss = 3.9178e-03, PNorm = 168.0740, GNorm = 0.0725, lr_0 = 2.7270e-04
Loss = 4.4249e-03, PNorm = 168.0811, GNorm = 0.1429, lr_0 = 2.7251e-04
Loss = 3.5302e-03, PNorm = 168.0911, GNorm = 0.1844, lr_0 = 2.7233e-04
Loss = 4.2495e-03, PNorm = 168.1011, GNorm = 0.1532, lr_0 = 2.7214e-04
Loss = 5.9668e-03, PNorm = 168.1119, GNorm = 0.1907, lr_0 = 2.7195e-04
Loss = 3.8975e-03, PNorm = 168.1187, GNorm = 0.1515, lr_0 = 2.7177e-04
Loss = 5.0589e-03, PNorm = 168.1268, GNorm = 0.0594, lr_0 = 2.7158e-04
Loss = 4.0248e-03, PNorm = 168.1316, GNorm = 0.3133, lr_0 = 2.7139e-04
Loss = 4.8834e-03, PNorm = 168.1407, GNorm = 0.2051, lr_0 = 2.7121e-04
Loss = 3.7454e-03, PNorm = 168.1503, GNorm = 0.1474, lr_0 = 2.7102e-04
Loss = 4.6822e-03, PNorm = 168.1574, GNorm = 0.1823, lr_0 = 2.7084e-04
Loss = 6.2214e-03, PNorm = 168.1659, GNorm = 0.1642, lr_0 = 2.7065e-04
Loss = 4.1359e-03, PNorm = 168.1764, GNorm = 0.1722, lr_0 = 2.7047e-04
Loss = 3.9150e-03, PNorm = 168.1848, GNorm = 0.2217, lr_0 = 2.7028e-04
Loss = 5.6435e-03, PNorm = 168.1937, GNorm = 0.1790, lr_0 = 2.7010e-04
Loss = 3.9923e-03, PNorm = 168.2034, GNorm = 0.1726, lr_0 = 2.6991e-04
Loss = 4.3729e-03, PNorm = 168.2135, GNorm = 0.1140, lr_0 = 2.6973e-04
Loss = 4.0376e-03, PNorm = 168.2244, GNorm = 0.2648, lr_0 = 2.6954e-04
Loss = 3.2924e-03, PNorm = 168.2364, GNorm = 0.1084, lr_0 = 2.6936e-04
Loss = 4.1078e-03, PNorm = 168.2457, GNorm = 0.1622, lr_0 = 2.6917e-04
Loss = 4.6445e-03, PNorm = 168.2560, GNorm = 0.0900, lr_0 = 2.6899e-04
Loss = 5.7212e-03, PNorm = 168.2648, GNorm = 0.3326, lr_0 = 2.6880e-04
Loss = 3.1199e-03, PNorm = 168.2743, GNorm = 0.4464, lr_0 = 2.6862e-04
Loss = 3.8286e-03, PNorm = 168.2832, GNorm = 0.1902, lr_0 = 2.6844e-04
Loss = 4.6890e-03, PNorm = 168.2905, GNorm = 0.1199, lr_0 = 2.6825e-04
Validation mae = 0.278791
Epoch 18
Loss = 3.9453e-03, PNorm = 168.2961, GNorm = 0.0814, lr_0 = 2.6807e-04
Loss = 5.5762e-03, PNorm = 168.3014, GNorm = 0.2788, lr_0 = 2.6788e-04
Loss = 4.1237e-03, PNorm = 168.3050, GNorm = 0.1499, lr_0 = 2.6770e-04
Loss = 3.5666e-03, PNorm = 168.3106, GNorm = 0.1670, lr_0 = 2.6752e-04
Loss = 3.4082e-03, PNorm = 168.3177, GNorm = 0.1014, lr_0 = 2.6733e-04
Loss = 5.6875e-03, PNorm = 168.3248, GNorm = 0.2539, lr_0 = 2.6715e-04
Loss = 4.4335e-03, PNorm = 168.3289, GNorm = 0.1423, lr_0 = 2.6697e-04
Loss = 3.5164e-03, PNorm = 168.3370, GNorm = 0.1886, lr_0 = 2.6678e-04
Loss = 3.6543e-03, PNorm = 168.3451, GNorm = 0.0549, lr_0 = 2.6660e-04
Loss = 4.6391e-03, PNorm = 168.3542, GNorm = 0.1551, lr_0 = 2.6642e-04
Loss = 5.4668e-03, PNorm = 168.3617, GNorm = 0.0798, lr_0 = 2.6624e-04
Loss = 3.5017e-03, PNorm = 168.3708, GNorm = 0.1578, lr_0 = 2.6605e-04
Loss = 3.4932e-03, PNorm = 168.3775, GNorm = 0.2075, lr_0 = 2.6587e-04
Loss = 3.7006e-03, PNorm = 168.3848, GNorm = 0.3405, lr_0 = 2.6569e-04
Loss = 4.1334e-03, PNorm = 168.3933, GNorm = 0.1183, lr_0 = 2.6551e-04
Loss = 3.7434e-03, PNorm = 168.4008, GNorm = 0.1306, lr_0 = 2.6533e-04
Loss = 4.3092e-03, PNorm = 168.4082, GNorm = 0.0888, lr_0 = 2.6514e-04
Loss = 5.9164e-03, PNorm = 168.4151, GNorm = 0.3052, lr_0 = 2.6496e-04
Loss = 2.6565e-03, PNorm = 168.4215, GNorm = 0.1627, lr_0 = 2.6478e-04
Loss = 3.0017e-03, PNorm = 168.4290, GNorm = 0.1468, lr_0 = 2.6460e-04
Loss = 3.7720e-03, PNorm = 168.4353, GNorm = 0.1605, lr_0 = 2.6442e-04
Loss = 3.4757e-03, PNorm = 168.4411, GNorm = 0.1983, lr_0 = 2.6424e-04
Loss = 4.3422e-03, PNorm = 168.4497, GNorm = 0.1653, lr_0 = 2.6406e-04
Loss = 3.5817e-03, PNorm = 168.4571, GNorm = 0.1007, lr_0 = 2.6388e-04
Loss = 4.2299e-03, PNorm = 168.4664, GNorm = 0.1575, lr_0 = 2.6369e-04
Loss = 5.0398e-03, PNorm = 168.4757, GNorm = 0.1005, lr_0 = 2.6351e-04
Loss = 3.5331e-03, PNorm = 168.4829, GNorm = 0.1561, lr_0 = 2.6333e-04
Loss = 3.3239e-03, PNorm = 168.4876, GNorm = 0.1924, lr_0 = 2.6315e-04
Loss = 3.7812e-03, PNorm = 168.4929, GNorm = 0.0986, lr_0 = 2.6297e-04
Loss = 4.1066e-03, PNorm = 168.4992, GNorm = 0.2720, lr_0 = 2.6279e-04
Loss = 4.0864e-03, PNorm = 168.5066, GNorm = 0.0742, lr_0 = 2.6261e-04
Loss = 5.6874e-03, PNorm = 168.5151, GNorm = 0.1902, lr_0 = 2.6243e-04
Loss = 4.2776e-03, PNorm = 168.5237, GNorm = 0.2063, lr_0 = 2.6225e-04
Loss = 3.6439e-03, PNorm = 168.5314, GNorm = 0.1841, lr_0 = 2.6207e-04
Loss = 3.4547e-03, PNorm = 168.5399, GNorm = 0.1445, lr_0 = 2.6189e-04
Loss = 3.0228e-03, PNorm = 168.5447, GNorm = 0.1785, lr_0 = 2.6171e-04
Loss = 3.0149e-03, PNorm = 168.5528, GNorm = 0.0855, lr_0 = 2.6153e-04
Loss = 2.9595e-03, PNorm = 168.5609, GNorm = 0.3482, lr_0 = 2.6136e-04
Loss = 3.1569e-03, PNorm = 168.5673, GNorm = 0.1094, lr_0 = 2.6118e-04
Loss = 4.8972e-03, PNorm = 168.5723, GNorm = 0.1387, lr_0 = 2.6100e-04
Loss = 3.7541e-03, PNorm = 168.5775, GNorm = 0.3577, lr_0 = 2.6082e-04
Loss = 3.6214e-03, PNorm = 168.5855, GNorm = 0.1786, lr_0 = 2.6064e-04
Loss = 3.3108e-03, PNorm = 168.5932, GNorm = 0.2754, lr_0 = 2.6046e-04
Loss = 4.0828e-03, PNorm = 168.5976, GNorm = 0.1161, lr_0 = 2.6028e-04
Loss = 5.4822e-03, PNorm = 168.6087, GNorm = 0.0771, lr_0 = 2.6011e-04
Loss = 4.0573e-03, PNorm = 168.6179, GNorm = 0.2849, lr_0 = 2.5993e-04
Loss = 5.8238e-03, PNorm = 168.6230, GNorm = 0.0550, lr_0 = 2.5975e-04
Loss = 4.7334e-03, PNorm = 168.6288, GNorm = 0.3257, lr_0 = 2.5957e-04
Loss = 3.5591e-03, PNorm = 168.6340, GNorm = 0.2026, lr_0 = 2.5939e-04
Loss = 2.6947e-03, PNorm = 168.6424, GNorm = 0.1500, lr_0 = 2.5922e-04
Loss = 3.7005e-03, PNorm = 168.6518, GNorm = 0.4369, lr_0 = 2.5904e-04
Loss = 5.9268e-03, PNorm = 168.6627, GNorm = 0.2426, lr_0 = 2.5886e-04
Loss = 3.7366e-03, PNorm = 168.6713, GNorm = 0.1560, lr_0 = 2.5868e-04
Loss = 4.1883e-03, PNorm = 168.6783, GNorm = 0.0777, lr_0 = 2.5851e-04
Loss = 7.3659e-03, PNorm = 168.6873, GNorm = 0.1173, lr_0 = 2.5833e-04
Loss = 3.4385e-03, PNorm = 168.6954, GNorm = 0.0783, lr_0 = 2.5815e-04
Loss = 3.6354e-03, PNorm = 168.7017, GNorm = 0.1590, lr_0 = 2.5797e-04
Loss = 4.1203e-03, PNorm = 168.7069, GNorm = 0.1599, lr_0 = 2.5780e-04
Loss = 5.8855e-03, PNorm = 168.7153, GNorm = 0.0832, lr_0 = 2.5762e-04
Loss = 3.3142e-03, PNorm = 168.7207, GNorm = 0.1782, lr_0 = 2.5745e-04
Loss = 4.2930e-03, PNorm = 168.7284, GNorm = 0.1322, lr_0 = 2.5727e-04
Loss = 3.7585e-03, PNorm = 168.7371, GNorm = 0.1869, lr_0 = 2.5709e-04
Loss = 3.7709e-03, PNorm = 168.7455, GNorm = 0.1711, lr_0 = 2.5692e-04
Loss = 5.7429e-03, PNorm = 168.7544, GNorm = 0.2604, lr_0 = 2.5674e-04
Loss = 3.9619e-03, PNorm = 168.7630, GNorm = 0.1850, lr_0 = 2.5656e-04
Loss = 3.2929e-03, PNorm = 168.7730, GNorm = 0.1173, lr_0 = 2.5639e-04
Loss = 4.3356e-03, PNorm = 168.7805, GNorm = 0.2619, lr_0 = 2.5621e-04
Loss = 2.9888e-03, PNorm = 168.7880, GNorm = 0.1158, lr_0 = 2.5604e-04
Loss = 4.3241e-03, PNorm = 168.7954, GNorm = 0.3154, lr_0 = 2.5586e-04
Loss = 2.9742e-03, PNorm = 168.8039, GNorm = 0.2632, lr_0 = 2.5569e-04
Loss = 3.1342e-03, PNorm = 168.8080, GNorm = 0.1837, lr_0 = 2.5551e-04
Loss = 3.2647e-03, PNorm = 168.8152, GNorm = 0.0646, lr_0 = 2.5534e-04
Loss = 3.4433e-03, PNorm = 168.8221, GNorm = 0.0906, lr_0 = 2.5516e-04
Loss = 2.6965e-03, PNorm = 168.8306, GNorm = 0.1649, lr_0 = 2.5499e-04
Loss = 2.9998e-03, PNorm = 168.8371, GNorm = 0.0752, lr_0 = 2.5481e-04
Loss = 3.3019e-03, PNorm = 168.8412, GNorm = 0.3856, lr_0 = 2.5464e-04
Loss = 3.4269e-03, PNorm = 168.8504, GNorm = 0.2524, lr_0 = 2.5446e-04
Loss = 3.4385e-03, PNorm = 168.8599, GNorm = 0.1523, lr_0 = 2.5429e-04
Loss = 4.0720e-03, PNorm = 168.8663, GNorm = 0.1224, lr_0 = 2.5411e-04
Loss = 3.1633e-03, PNorm = 168.8703, GNorm = 0.1417, lr_0 = 2.5394e-04
Loss = 3.3119e-03, PNorm = 168.8770, GNorm = 0.2257, lr_0 = 2.5377e-04
Loss = 4.0195e-03, PNorm = 168.8873, GNorm = 0.2137, lr_0 = 2.5359e-04
Loss = 4.4814e-03, PNorm = 168.8949, GNorm = 0.3582, lr_0 = 2.5342e-04
Loss = 5.9230e-03, PNorm = 168.9036, GNorm = 0.1161, lr_0 = 2.5325e-04
Loss = 2.8971e-03, PNorm = 168.9091, GNorm = 0.1426, lr_0 = 2.5307e-04
Loss = 3.5338e-03, PNorm = 168.9155, GNorm = 0.0907, lr_0 = 2.5290e-04
Loss = 4.1508e-03, PNorm = 168.9236, GNorm = 0.3151, lr_0 = 2.5273e-04
Loss = 6.0774e-03, PNorm = 168.9309, GNorm = 0.1476, lr_0 = 2.5255e-04
Loss = 3.4409e-03, PNorm = 168.9398, GNorm = 0.0890, lr_0 = 2.5238e-04
Loss = 3.5456e-03, PNorm = 168.9479, GNorm = 0.0946, lr_0 = 2.5221e-04
Loss = 5.2138e-03, PNorm = 168.9544, GNorm = 0.3114, lr_0 = 2.5203e-04
Loss = 4.8466e-03, PNorm = 168.9615, GNorm = 0.2621, lr_0 = 2.5186e-04
Loss = 4.4076e-03, PNorm = 168.9695, GNorm = 0.3891, lr_0 = 2.5169e-04
Loss = 5.1878e-03, PNorm = 168.9797, GNorm = 0.3303, lr_0 = 2.5152e-04
Loss = 3.9188e-03, PNorm = 168.9878, GNorm = 0.1238, lr_0 = 2.5134e-04
Loss = 5.6341e-03, PNorm = 168.9960, GNorm = 0.1063, lr_0 = 2.5117e-04
Loss = 3.4036e-03, PNorm = 169.0037, GNorm = 0.2707, lr_0 = 2.5100e-04
Loss = 4.3183e-03, PNorm = 169.0133, GNorm = 0.1026, lr_0 = 2.5083e-04
Loss = 3.8455e-03, PNorm = 169.0212, GNorm = 0.0747, lr_0 = 2.5066e-04
Loss = 2.8339e-03, PNorm = 169.0267, GNorm = 0.1663, lr_0 = 2.5048e-04
Loss = 3.8754e-03, PNorm = 169.0347, GNorm = 0.0742, lr_0 = 2.5031e-04
Loss = 3.1482e-03, PNorm = 169.0413, GNorm = 0.1784, lr_0 = 2.5014e-04
Loss = 3.9515e-03, PNorm = 169.0490, GNorm = 0.0918, lr_0 = 2.4997e-04
Loss = 4.0792e-03, PNorm = 169.0554, GNorm = 0.0842, lr_0 = 2.4980e-04
Loss = 3.3134e-03, PNorm = 169.0621, GNorm = 0.0710, lr_0 = 2.4963e-04
Loss = 5.6375e-03, PNorm = 169.0692, GNorm = 0.1637, lr_0 = 2.4946e-04
Loss = 3.3176e-03, PNorm = 169.0769, GNorm = 0.1569, lr_0 = 2.4929e-04
Loss = 4.7577e-03, PNorm = 169.0856, GNorm = 0.1454, lr_0 = 2.4911e-04
Loss = 3.2948e-03, PNorm = 169.0934, GNorm = 0.1458, lr_0 = 2.4894e-04
Loss = 4.3274e-03, PNorm = 169.1030, GNorm = 0.1396, lr_0 = 2.4877e-04
Loss = 3.0141e-03, PNorm = 169.1111, GNorm = 0.1272, lr_0 = 2.4860e-04
Loss = 3.7467e-03, PNorm = 169.1190, GNorm = 0.2563, lr_0 = 2.4843e-04
Loss = 4.0583e-03, PNorm = 169.1261, GNorm = 0.2108, lr_0 = 2.4826e-04
Loss = 6.4953e-03, PNorm = 169.1329, GNorm = 0.1270, lr_0 = 2.4809e-04
Loss = 3.6371e-03, PNorm = 169.1411, GNorm = 0.3074, lr_0 = 2.4792e-04
Loss = 4.4687e-03, PNorm = 169.1494, GNorm = 0.2153, lr_0 = 2.4775e-04
Loss = 3.8452e-03, PNorm = 169.1559, GNorm = 0.2447, lr_0 = 2.4758e-04
Loss = 4.6442e-03, PNorm = 169.1621, GNorm = 0.2672, lr_0 = 2.4741e-04
Loss = 3.9637e-03, PNorm = 169.1676, GNorm = 0.2328, lr_0 = 2.4724e-04
Loss = 4.1523e-03, PNorm = 169.1754, GNorm = 0.0974, lr_0 = 2.4707e-04
Validation mae = 0.278316
Epoch 19
Loss = 4.3234e-03, PNorm = 169.1824, GNorm = 0.2990, lr_0 = 2.4690e-04
Loss = 3.5291e-03, PNorm = 169.1904, GNorm = 0.2698, lr_0 = 2.4674e-04
Loss = 2.8636e-03, PNorm = 169.1968, GNorm = 0.1957, lr_0 = 2.4657e-04
Loss = 3.2281e-03, PNorm = 169.2032, GNorm = 0.3278, lr_0 = 2.4640e-04
Loss = 3.0580e-03, PNorm = 169.2078, GNorm = 0.2070, lr_0 = 2.4623e-04
Loss = 3.0692e-03, PNorm = 169.2134, GNorm = 0.0984, lr_0 = 2.4606e-04
Loss = 2.8780e-03, PNorm = 169.2202, GNorm = 0.2268, lr_0 = 2.4589e-04
Loss = 2.5343e-03, PNorm = 169.2245, GNorm = 0.1956, lr_0 = 2.4572e-04
Loss = 2.9823e-03, PNorm = 169.2286, GNorm = 0.2176, lr_0 = 2.4556e-04
Loss = 2.4069e-03, PNorm = 169.2351, GNorm = 0.1125, lr_0 = 2.4539e-04
Loss = 3.1366e-03, PNorm = 169.2409, GNorm = 0.1518, lr_0 = 2.4522e-04
Loss = 3.1094e-03, PNorm = 169.2476, GNorm = 0.2194, lr_0 = 2.4505e-04
Loss = 4.9160e-03, PNorm = 169.2525, GNorm = 0.3024, lr_0 = 2.4488e-04
Loss = 2.9986e-03, PNorm = 169.2597, GNorm = 0.2086, lr_0 = 2.4472e-04
Loss = 2.9934e-03, PNorm = 169.2677, GNorm = 0.1375, lr_0 = 2.4455e-04
Loss = 2.8182e-03, PNorm = 169.2760, GNorm = 0.2235, lr_0 = 2.4438e-04
Loss = 3.1967e-03, PNorm = 169.2813, GNorm = 0.1357, lr_0 = 2.4421e-04
Loss = 2.7503e-03, PNorm = 169.2838, GNorm = 0.1365, lr_0 = 2.4405e-04
Loss = 3.6613e-03, PNorm = 169.2894, GNorm = 0.2195, lr_0 = 2.4388e-04
Loss = 4.9369e-03, PNorm = 169.2923, GNorm = 0.3880, lr_0 = 2.4371e-04
Loss = 2.9473e-03, PNorm = 169.2966, GNorm = 0.1457, lr_0 = 2.4354e-04
Loss = 2.7618e-03, PNorm = 169.3022, GNorm = 0.2385, lr_0 = 2.4338e-04
Loss = 3.0335e-03, PNorm = 169.3074, GNorm = 0.1890, lr_0 = 2.4321e-04
Loss = 3.2814e-03, PNorm = 169.3119, GNorm = 0.0866, lr_0 = 2.4304e-04
Loss = 5.2223e-03, PNorm = 169.3173, GNorm = 0.1107, lr_0 = 2.4288e-04
Loss = 2.8410e-03, PNorm = 169.3237, GNorm = 0.1655, lr_0 = 2.4271e-04
Loss = 2.3896e-03, PNorm = 169.3300, GNorm = 0.1331, lr_0 = 2.4254e-04
Loss = 3.5871e-03, PNorm = 169.3364, GNorm = 0.1263, lr_0 = 2.4238e-04
Loss = 3.4649e-03, PNorm = 169.3404, GNorm = 0.3141, lr_0 = 2.4221e-04
Loss = 3.2454e-03, PNorm = 169.3453, GNorm = 0.1267, lr_0 = 2.4205e-04
Loss = 3.8786e-03, PNorm = 169.3514, GNorm = 0.1497, lr_0 = 2.4188e-04
Loss = 4.6890e-03, PNorm = 169.3562, GNorm = 0.1282, lr_0 = 2.4171e-04
Loss = 3.0934e-03, PNorm = 169.3622, GNorm = 0.0769, lr_0 = 2.4155e-04
Loss = 3.0437e-03, PNorm = 169.3679, GNorm = 0.2562, lr_0 = 2.4138e-04
Loss = 2.3973e-03, PNorm = 169.3737, GNorm = 0.0464, lr_0 = 2.4122e-04
Loss = 3.0220e-03, PNorm = 169.3795, GNorm = 0.1850, lr_0 = 2.4105e-04
Loss = 6.2011e-03, PNorm = 169.3856, GNorm = 0.1236, lr_0 = 2.4089e-04
Loss = 2.8767e-03, PNorm = 169.3912, GNorm = 0.1934, lr_0 = 2.4072e-04
Loss = 3.0908e-03, PNorm = 169.3984, GNorm = 0.3390, lr_0 = 2.4056e-04
Loss = 4.3201e-03, PNorm = 169.4058, GNorm = 0.1544, lr_0 = 2.4039e-04
Loss = 3.4043e-03, PNorm = 169.4128, GNorm = 0.1391, lr_0 = 2.4023e-04
Loss = 2.7776e-03, PNorm = 169.4180, GNorm = 0.1423, lr_0 = 2.4006e-04
Loss = 2.7940e-03, PNorm = 169.4228, GNorm = 0.1022, lr_0 = 2.3990e-04
Loss = 3.7482e-03, PNorm = 169.4277, GNorm = 0.1471, lr_0 = 2.3974e-04
Loss = 3.4733e-03, PNorm = 169.4337, GNorm = 0.2572, lr_0 = 2.3957e-04
Loss = 2.4716e-03, PNorm = 169.4393, GNorm = 0.0518, lr_0 = 2.3941e-04
Loss = 6.4812e-03, PNorm = 169.4471, GNorm = 0.2754, lr_0 = 2.3924e-04
Loss = 2.8746e-03, PNorm = 169.4519, GNorm = 0.0806, lr_0 = 2.3908e-04
Loss = 4.0424e-03, PNorm = 169.4606, GNorm = 0.0546, lr_0 = 2.3892e-04
Loss = 2.5368e-03, PNorm = 169.4650, GNorm = 0.2429, lr_0 = 2.3875e-04
Loss = 3.7155e-03, PNorm = 169.4695, GNorm = 0.1401, lr_0 = 2.3859e-04
Loss = 2.8837e-03, PNorm = 169.4747, GNorm = 0.1216, lr_0 = 2.3842e-04
Loss = 2.9311e-03, PNorm = 169.4806, GNorm = 0.2158, lr_0 = 2.3826e-04
Loss = 2.9986e-03, PNorm = 169.4890, GNorm = 0.0864, lr_0 = 2.3810e-04
Loss = 3.8055e-03, PNorm = 169.4965, GNorm = 0.0940, lr_0 = 2.3794e-04
Loss = 6.1371e-03, PNorm = 169.5018, GNorm = 0.0813, lr_0 = 2.3777e-04
Loss = 4.0990e-03, PNorm = 169.5085, GNorm = 0.0688, lr_0 = 2.3761e-04
Loss = 3.6978e-03, PNorm = 169.5151, GNorm = 0.1884, lr_0 = 2.3745e-04
Loss = 2.7689e-03, PNorm = 169.5229, GNorm = 0.1476, lr_0 = 2.3728e-04
Loss = 4.2350e-03, PNorm = 169.5326, GNorm = 0.1089, lr_0 = 2.3712e-04
Loss = 2.6606e-03, PNorm = 169.5396, GNorm = 0.0865, lr_0 = 2.3696e-04
Loss = 3.3468e-03, PNorm = 169.5457, GNorm = 0.1556, lr_0 = 2.3680e-04
Loss = 2.6451e-03, PNorm = 169.5525, GNorm = 0.2147, lr_0 = 2.3663e-04
Loss = 3.3548e-03, PNorm = 169.5613, GNorm = 0.0851, lr_0 = 2.3647e-04
Loss = 3.3065e-03, PNorm = 169.5693, GNorm = 0.0943, lr_0 = 2.3631e-04
Loss = 2.8375e-03, PNorm = 169.5771, GNorm = 0.0553, lr_0 = 2.3615e-04
Loss = 4.5823e-03, PNorm = 169.5853, GNorm = 0.2311, lr_0 = 2.3599e-04
Loss = 4.4252e-03, PNorm = 169.5908, GNorm = 0.0860, lr_0 = 2.3582e-04
Loss = 2.7372e-03, PNorm = 169.5986, GNorm = 0.0687, lr_0 = 2.3566e-04
Loss = 3.7930e-03, PNorm = 169.6034, GNorm = 0.1344, lr_0 = 2.3550e-04
Loss = 3.0855e-03, PNorm = 169.6086, GNorm = 0.2386, lr_0 = 2.3534e-04
Loss = 3.1449e-03, PNorm = 169.6133, GNorm = 0.2369, lr_0 = 2.3518e-04
Loss = 5.0654e-03, PNorm = 169.6176, GNorm = 0.3272, lr_0 = 2.3502e-04
Loss = 4.7747e-03, PNorm = 169.6226, GNorm = 0.4358, lr_0 = 2.3486e-04
Loss = 4.8514e-03, PNorm = 169.6285, GNorm = 0.3210, lr_0 = 2.3470e-04
Loss = 3.2736e-03, PNorm = 169.6333, GNorm = 0.1047, lr_0 = 2.3454e-04
Loss = 4.9352e-03, PNorm = 169.6391, GNorm = 0.2835, lr_0 = 2.3437e-04
Loss = 2.7856e-03, PNorm = 169.6435, GNorm = 0.1764, lr_0 = 2.3421e-04
Loss = 3.8520e-03, PNorm = 169.6497, GNorm = 0.1523, lr_0 = 2.3405e-04
Loss = 3.4641e-03, PNorm = 169.6586, GNorm = 0.2456, lr_0 = 2.3389e-04
Loss = 2.9039e-03, PNorm = 169.6695, GNorm = 0.0773, lr_0 = 2.3373e-04
Loss = 5.1234e-03, PNorm = 169.6795, GNorm = 0.1618, lr_0 = 2.3357e-04
Loss = 3.4675e-03, PNorm = 169.6857, GNorm = 0.2206, lr_0 = 2.3341e-04
Loss = 2.5017e-03, PNorm = 169.6900, GNorm = 0.0598, lr_0 = 2.3325e-04
Loss = 2.8875e-03, PNorm = 169.6944, GNorm = 0.1514, lr_0 = 2.3309e-04
Loss = 4.1453e-03, PNorm = 169.6998, GNorm = 0.1697, lr_0 = 2.3293e-04
Loss = 2.6494e-03, PNorm = 169.7063, GNorm = 0.1692, lr_0 = 2.3277e-04
Loss = 3.0986e-03, PNorm = 169.7133, GNorm = 0.1889, lr_0 = 2.3261e-04
Loss = 3.6309e-03, PNorm = 169.7207, GNorm = 0.0734, lr_0 = 2.3246e-04
Loss = 2.8863e-03, PNorm = 169.7294, GNorm = 0.1150, lr_0 = 2.3230e-04
Loss = 2.9323e-03, PNorm = 169.7343, GNorm = 0.2345, lr_0 = 2.3214e-04
Loss = 3.2751e-03, PNorm = 169.7412, GNorm = 0.2756, lr_0 = 2.3198e-04
Loss = 3.5816e-03, PNorm = 169.7468, GNorm = 0.1236, lr_0 = 2.3182e-04
Loss = 2.5948e-03, PNorm = 169.7548, GNorm = 0.2402, lr_0 = 2.3166e-04
Loss = 4.0901e-03, PNorm = 169.7625, GNorm = 0.1693, lr_0 = 2.3150e-04
Loss = 3.5281e-03, PNorm = 169.7718, GNorm = 0.2155, lr_0 = 2.3134e-04
Loss = 2.2242e-03, PNorm = 169.7807, GNorm = 0.0903, lr_0 = 2.3118e-04
Loss = 2.2086e-03, PNorm = 169.7850, GNorm = 0.1220, lr_0 = 2.3103e-04
Loss = 2.6309e-03, PNorm = 169.7896, GNorm = 0.1891, lr_0 = 2.3087e-04
Loss = 3.1516e-03, PNorm = 169.7945, GNorm = 0.1466, lr_0 = 2.3071e-04
Loss = 3.1546e-03, PNorm = 169.8015, GNorm = 0.2445, lr_0 = 2.3055e-04
Loss = 2.5267e-03, PNorm = 169.8069, GNorm = 0.1442, lr_0 = 2.3039e-04
Loss = 2.6474e-03, PNorm = 169.8113, GNorm = 0.0613, lr_0 = 2.3024e-04
Loss = 3.0237e-03, PNorm = 169.8177, GNorm = 0.2225, lr_0 = 2.3008e-04
Loss = 4.9035e-03, PNorm = 169.8266, GNorm = 0.0636, lr_0 = 2.2992e-04
Loss = 3.4421e-03, PNorm = 169.8325, GNorm = 0.1256, lr_0 = 2.2976e-04
Loss = 3.2296e-03, PNorm = 169.8388, GNorm = 0.0688, lr_0 = 2.2961e-04
Loss = 3.3616e-03, PNorm = 169.8448, GNorm = 0.0637, lr_0 = 2.2945e-04
Loss = 3.1369e-03, PNorm = 169.8502, GNorm = 0.0752, lr_0 = 2.2929e-04
Loss = 4.3721e-03, PNorm = 169.8577, GNorm = 0.2329, lr_0 = 2.2913e-04
Loss = 3.0163e-03, PNorm = 169.8635, GNorm = 0.0651, lr_0 = 2.2898e-04
Loss = 3.1874e-03, PNorm = 169.8687, GNorm = 0.1000, lr_0 = 2.2882e-04
Loss = 4.3789e-03, PNorm = 169.8762, GNorm = 0.0790, lr_0 = 2.2866e-04
Loss = 4.8861e-03, PNorm = 169.8827, GNorm = 0.2478, lr_0 = 2.2851e-04
Loss = 4.1575e-03, PNorm = 169.8872, GNorm = 0.1424, lr_0 = 2.2835e-04
Loss = 3.0593e-03, PNorm = 169.8935, GNorm = 0.0997, lr_0 = 2.2819e-04
Loss = 4.9521e-03, PNorm = 169.8999, GNorm = 0.2152, lr_0 = 2.2804e-04
Loss = 5.1143e-03, PNorm = 169.9073, GNorm = 0.3223, lr_0 = 2.2788e-04
Loss = 2.4125e-03, PNorm = 169.9138, GNorm = 0.0771, lr_0 = 2.2773e-04
Loss = 2.8110e-03, PNorm = 169.9234, GNorm = 0.2715, lr_0 = 2.2757e-04
Validation mae = 0.278044
Epoch 20
Loss = 2.3171e-03, PNorm = 169.9286, GNorm = 0.2910, lr_0 = 2.2741e-04
Loss = 2.4149e-03, PNorm = 169.9346, GNorm = 0.4939, lr_0 = 2.2726e-04
Loss = 2.7706e-03, PNorm = 169.9375, GNorm = 0.2047, lr_0 = 2.2710e-04
Loss = 2.2394e-03, PNorm = 169.9420, GNorm = 0.1634, lr_0 = 2.2695e-04
Loss = 2.5282e-03, PNorm = 169.9452, GNorm = 0.1963, lr_0 = 2.2679e-04
Loss = 3.1265e-03, PNorm = 169.9479, GNorm = 0.0769, lr_0 = 2.2664e-04
Loss = 3.0941e-03, PNorm = 169.9503, GNorm = 0.1504, lr_0 = 2.2648e-04
Loss = 4.1564e-03, PNorm = 169.9550, GNorm = 0.1178, lr_0 = 2.2632e-04
Loss = 3.2846e-03, PNorm = 169.9610, GNorm = 0.2266, lr_0 = 2.2617e-04
Loss = 2.8619e-03, PNorm = 169.9675, GNorm = 0.1832, lr_0 = 2.2601e-04
Loss = 2.3039e-03, PNorm = 169.9755, GNorm = 0.1693, lr_0 = 2.2586e-04
Loss = 2.9362e-03, PNorm = 169.9810, GNorm = 0.1766, lr_0 = 2.2571e-04
Loss = 3.1798e-03, PNorm = 169.9875, GNorm = 0.2194, lr_0 = 2.2555e-04
Loss = 2.1442e-03, PNorm = 169.9920, GNorm = 0.2418, lr_0 = 2.2540e-04
Loss = 2.8980e-03, PNorm = 169.9977, GNorm = 0.0793, lr_0 = 2.2524e-04
Loss = 2.8086e-03, PNorm = 170.0023, GNorm = 0.1636, lr_0 = 2.2509e-04
Loss = 4.0368e-03, PNorm = 170.0065, GNorm = 0.1035, lr_0 = 2.2493e-04
Loss = 2.9605e-03, PNorm = 170.0096, GNorm = 0.1840, lr_0 = 2.2478e-04
Loss = 3.8946e-03, PNorm = 170.0120, GNorm = 0.2260, lr_0 = 2.2463e-04
Loss = 2.4615e-03, PNorm = 170.0183, GNorm = 0.1168, lr_0 = 2.2447e-04
Loss = 2.5472e-03, PNorm = 170.0249, GNorm = 0.1762, lr_0 = 2.2432e-04
Loss = 1.9547e-03, PNorm = 170.0301, GNorm = 0.0665, lr_0 = 2.2416e-04
Loss = 2.2594e-03, PNorm = 170.0334, GNorm = 0.0662, lr_0 = 2.2401e-04
Loss = 2.6526e-03, PNorm = 170.0370, GNorm = 0.0561, lr_0 = 2.2386e-04
Loss = 2.7159e-03, PNorm = 170.0402, GNorm = 0.1087, lr_0 = 2.2370e-04
Loss = 2.9109e-03, PNorm = 170.0436, GNorm = 0.0627, lr_0 = 2.2355e-04
Loss = 2.3875e-03, PNorm = 170.0483, GNorm = 0.1096, lr_0 = 2.2340e-04
Loss = 2.5861e-03, PNorm = 170.0543, GNorm = 0.1641, lr_0 = 2.2324e-04
Loss = 3.0545e-03, PNorm = 170.0578, GNorm = 0.0505, lr_0 = 2.2309e-04
Loss = 2.7798e-03, PNorm = 170.0625, GNorm = 0.1612, lr_0 = 2.2294e-04
Loss = 3.4645e-03, PNorm = 170.0668, GNorm = 0.2570, lr_0 = 2.2279e-04
Loss = 3.8698e-03, PNorm = 170.0715, GNorm = 0.2559, lr_0 = 2.2263e-04
Loss = 2.8944e-03, PNorm = 170.0786, GNorm = 0.0807, lr_0 = 2.2248e-04
Loss = 2.5994e-03, PNorm = 170.0848, GNorm = 0.0921, lr_0 = 2.2233e-04
Loss = 2.8368e-03, PNorm = 170.0880, GNorm = 0.1036, lr_0 = 2.2218e-04
Loss = 4.0247e-03, PNorm = 170.0925, GNorm = 0.0575, lr_0 = 2.2202e-04
Loss = 2.5777e-03, PNorm = 170.0996, GNorm = 0.1744, lr_0 = 2.2187e-04
Loss = 3.2942e-03, PNorm = 170.1047, GNorm = 0.3379, lr_0 = 2.2172e-04
Loss = 3.2033e-03, PNorm = 170.1100, GNorm = 0.1133, lr_0 = 2.2157e-04
Loss = 2.3631e-03, PNorm = 170.1171, GNorm = 0.1134, lr_0 = 2.2142e-04
Loss = 2.7043e-03, PNorm = 170.1216, GNorm = 0.2574, lr_0 = 2.2126e-04
Loss = 2.5074e-03, PNorm = 170.1251, GNorm = 0.1735, lr_0 = 2.2111e-04
Loss = 3.0125e-03, PNorm = 170.1284, GNorm = 0.1182, lr_0 = 2.2096e-04
Loss = 3.0546e-03, PNorm = 170.1323, GNorm = 0.1337, lr_0 = 2.2081e-04
Loss = 2.1074e-03, PNorm = 170.1382, GNorm = 0.1321, lr_0 = 2.2066e-04
Loss = 2.5833e-03, PNorm = 170.1440, GNorm = 0.2195, lr_0 = 2.2051e-04
Loss = 3.5751e-03, PNorm = 170.1516, GNorm = 0.1392, lr_0 = 2.2036e-04
Loss = 2.4135e-03, PNorm = 170.1551, GNorm = 0.1654, lr_0 = 2.2021e-04
Loss = 2.8081e-03, PNorm = 170.1617, GNorm = 0.1801, lr_0 = 2.2005e-04
Loss = 3.1811e-03, PNorm = 170.1684, GNorm = 0.0951, lr_0 = 2.1990e-04
Loss = 2.2456e-03, PNorm = 170.1733, GNorm = 0.2254, lr_0 = 2.1975e-04
Loss = 1.9409e-03, PNorm = 170.1782, GNorm = 0.0725, lr_0 = 2.1960e-04
Loss = 3.1284e-03, PNorm = 170.1858, GNorm = 0.0859, lr_0 = 2.1945e-04
Loss = 2.8628e-03, PNorm = 170.1919, GNorm = 0.2371, lr_0 = 2.1930e-04
Loss = 4.1930e-03, PNorm = 170.1961, GNorm = 0.1693, lr_0 = 2.1915e-04
Loss = 2.2167e-03, PNorm = 170.1970, GNorm = 0.1331, lr_0 = 2.1900e-04
Loss = 2.5246e-03, PNorm = 170.2004, GNorm = 0.2072, lr_0 = 2.1885e-04
Loss = 2.1617e-03, PNorm = 170.2074, GNorm = 0.1516, lr_0 = 2.1870e-04
Loss = 4.6441e-03, PNorm = 170.2140, GNorm = 0.0815, lr_0 = 2.1855e-04
Loss = 2.0547e-03, PNorm = 170.2199, GNorm = 0.1103, lr_0 = 2.1840e-04
Loss = 4.9764e-03, PNorm = 170.2242, GNorm = 0.1788, lr_0 = 2.1825e-04
Loss = 2.9298e-03, PNorm = 170.2302, GNorm = 0.2988, lr_0 = 2.1810e-04
Loss = 1.9316e-03, PNorm = 170.2350, GNorm = 0.1245, lr_0 = 2.1795e-04
Loss = 3.7650e-03, PNorm = 170.2384, GNorm = 0.2313, lr_0 = 2.1780e-04
Loss = 3.4980e-03, PNorm = 170.2437, GNorm = 0.1421, lr_0 = 2.1765e-04
Loss = 2.9112e-03, PNorm = 170.2502, GNorm = 0.0704, lr_0 = 2.1751e-04
Loss = 2.3671e-03, PNorm = 170.2588, GNorm = 0.1256, lr_0 = 2.1736e-04
Loss = 3.1334e-03, PNorm = 170.2640, GNorm = 0.1668, lr_0 = 2.1721e-04
Loss = 2.2614e-03, PNorm = 170.2694, GNorm = 0.1578, lr_0 = 2.1706e-04
Loss = 2.2031e-03, PNorm = 170.2734, GNorm = 0.1648, lr_0 = 2.1691e-04
Loss = 3.8364e-03, PNorm = 170.2772, GNorm = 0.1380, lr_0 = 2.1676e-04
Loss = 2.6093e-03, PNorm = 170.2848, GNorm = 0.1462, lr_0 = 2.1661e-04
Loss = 3.8643e-03, PNorm = 170.2902, GNorm = 0.3075, lr_0 = 2.1646e-04
Loss = 3.2014e-03, PNorm = 170.2951, GNorm = 0.0953, lr_0 = 2.1632e-04
Loss = 4.6826e-03, PNorm = 170.2992, GNorm = 0.1940, lr_0 = 2.1617e-04
Loss = 3.2161e-03, PNorm = 170.3037, GNorm = 0.1585, lr_0 = 2.1602e-04
Loss = 3.8409e-03, PNorm = 170.3069, GNorm = 0.2623, lr_0 = 2.1587e-04
Loss = 3.1011e-03, PNorm = 170.3126, GNorm = 0.1090, lr_0 = 2.1572e-04
Loss = 2.5163e-03, PNorm = 170.3194, GNorm = 0.0782, lr_0 = 2.1558e-04
Loss = 3.1679e-03, PNorm = 170.3263, GNorm = 0.2486, lr_0 = 2.1543e-04
Loss = 2.9647e-03, PNorm = 170.3319, GNorm = 0.1403, lr_0 = 2.1528e-04
Loss = 2.0999e-03, PNorm = 170.3362, GNorm = 0.1078, lr_0 = 2.1513e-04
Loss = 3.0513e-03, PNorm = 170.3398, GNorm = 0.2833, lr_0 = 2.1499e-04
Loss = 2.1864e-03, PNorm = 170.3449, GNorm = 0.1712, lr_0 = 2.1484e-04
Loss = 3.3024e-03, PNorm = 170.3504, GNorm = 0.5483, lr_0 = 2.1469e-04
Loss = 2.3631e-03, PNorm = 170.3558, GNorm = 0.1830, lr_0 = 2.1454e-04
Loss = 2.8656e-03, PNorm = 170.3596, GNorm = 0.3487, lr_0 = 2.1440e-04
Loss = 3.3791e-03, PNorm = 170.3633, GNorm = 0.2587, lr_0 = 2.1425e-04
Loss = 2.2743e-03, PNorm = 170.3694, GNorm = 0.1974, lr_0 = 2.1410e-04
Loss = 2.6565e-03, PNorm = 170.3749, GNorm = 0.0835, lr_0 = 2.1396e-04
Loss = 2.9868e-03, PNorm = 170.3816, GNorm = 0.0886, lr_0 = 2.1381e-04
Loss = 2.6087e-03, PNorm = 170.3868, GNorm = 0.1271, lr_0 = 2.1366e-04
Loss = 4.0726e-03, PNorm = 170.3932, GNorm = 0.1543, lr_0 = 2.1352e-04
Loss = 3.2926e-03, PNorm = 170.3994, GNorm = 0.2044, lr_0 = 2.1337e-04
Loss = 2.4866e-03, PNorm = 170.4042, GNorm = 0.1774, lr_0 = 2.1323e-04
Loss = 4.2953e-03, PNorm = 170.4083, GNorm = 0.4232, lr_0 = 2.1308e-04
Loss = 3.7155e-03, PNorm = 170.4144, GNorm = 0.3121, lr_0 = 2.1293e-04
Loss = 1.9878e-03, PNorm = 170.4190, GNorm = 0.2589, lr_0 = 2.1279e-04
Loss = 3.6236e-03, PNorm = 170.4240, GNorm = 0.2119, lr_0 = 2.1264e-04
Loss = 3.0068e-03, PNorm = 170.4295, GNorm = 0.1805, lr_0 = 2.1250e-04
Loss = 3.4602e-03, PNorm = 170.4341, GNorm = 0.1079, lr_0 = 2.1235e-04
Loss = 4.3639e-03, PNorm = 170.4382, GNorm = 0.1310, lr_0 = 2.1221e-04
Loss = 3.2938e-03, PNorm = 170.4444, GNorm = 0.1368, lr_0 = 2.1206e-04
Loss = 4.4323e-03, PNorm = 170.4526, GNorm = 0.0813, lr_0 = 2.1191e-04
Loss = 2.1352e-03, PNorm = 170.4593, GNorm = 0.1182, lr_0 = 2.1177e-04
Loss = 2.2675e-03, PNorm = 170.4659, GNorm = 0.2263, lr_0 = 2.1162e-04
Loss = 4.2216e-03, PNorm = 170.4719, GNorm = 0.0740, lr_0 = 2.1148e-04
Loss = 2.1041e-03, PNorm = 170.4769, GNorm = 0.1843, lr_0 = 2.1133e-04
Loss = 2.1477e-03, PNorm = 170.4808, GNorm = 0.1633, lr_0 = 2.1119e-04
Loss = 4.6033e-03, PNorm = 170.4883, GNorm = 0.2429, lr_0 = 2.1104e-04
Loss = 2.4087e-03, PNorm = 170.4924, GNorm = 0.0606, lr_0 = 2.1090e-04
Loss = 3.1016e-03, PNorm = 170.4971, GNorm = 0.0759, lr_0 = 2.1076e-04
Loss = 6.6493e-03, PNorm = 170.5005, GNorm = 0.1648, lr_0 = 2.1061e-04
Loss = 2.4430e-03, PNorm = 170.5073, GNorm = 0.2582, lr_0 = 2.1047e-04
Loss = 2.5497e-03, PNorm = 170.5128, GNorm = 0.1766, lr_0 = 2.1032e-04
Loss = 2.6994e-03, PNorm = 170.5197, GNorm = 0.0965, lr_0 = 2.1018e-04
Loss = 3.1202e-03, PNorm = 170.5245, GNorm = 0.1299, lr_0 = 2.1003e-04
Loss = 3.2845e-03, PNorm = 170.5320, GNorm = 0.1875, lr_0 = 2.0989e-04
Loss = 4.5088e-03, PNorm = 170.5383, GNorm = 0.2517, lr_0 = 2.0975e-04
Loss = 5.9594e-03, PNorm = 170.5440, GNorm = 0.1775, lr_0 = 2.0960e-04
Validation mae = 0.278078
Epoch 21
Loss = 3.3279e-03, PNorm = 170.5489, GNorm = 0.2308, lr_0 = 2.0946e-04
Loss = 4.0260e-03, PNorm = 170.5529, GNorm = 0.1721, lr_0 = 2.0932e-04
Loss = 2.2980e-03, PNorm = 170.5568, GNorm = 0.0740, lr_0 = 2.0917e-04
Loss = 2.9486e-03, PNorm = 170.5595, GNorm = 0.0875, lr_0 = 2.0903e-04
Loss = 2.6918e-03, PNorm = 170.5636, GNorm = 0.0761, lr_0 = 2.0889e-04
Loss = 2.9364e-03, PNorm = 170.5688, GNorm = 0.1574, lr_0 = 2.0874e-04
Loss = 2.1560e-03, PNorm = 170.5716, GNorm = 0.0655, lr_0 = 2.0860e-04
Loss = 4.1497e-03, PNorm = 170.5769, GNorm = 0.2124, lr_0 = 2.0846e-04
Loss = 2.6295e-03, PNorm = 170.5821, GNorm = 0.2692, lr_0 = 2.0831e-04
Loss = 3.9162e-03, PNorm = 170.5898, GNorm = 0.1263, lr_0 = 2.0817e-04
Loss = 2.2661e-03, PNorm = 170.5954, GNorm = 0.1156, lr_0 = 2.0803e-04
Loss = 2.2968e-03, PNorm = 170.6002, GNorm = 0.1038, lr_0 = 2.0789e-04
Loss = 2.1859e-03, PNorm = 170.6043, GNorm = 0.2401, lr_0 = 2.0774e-04
Loss = 4.5810e-03, PNorm = 170.6080, GNorm = 0.0929, lr_0 = 2.0760e-04
Loss = 2.5786e-03, PNorm = 170.6127, GNorm = 0.1711, lr_0 = 2.0746e-04
Loss = 3.2302e-03, PNorm = 170.6167, GNorm = 0.0553, lr_0 = 2.0732e-04
Loss = 2.1879e-03, PNorm = 170.6226, GNorm = 0.2405, lr_0 = 2.0718e-04
Loss = 2.5485e-03, PNorm = 170.6274, GNorm = 0.1295, lr_0 = 2.0703e-04
Loss = 1.9346e-03, PNorm = 170.6332, GNorm = 0.1281, lr_0 = 2.0689e-04
Loss = 2.0001e-03, PNorm = 170.6378, GNorm = 0.1386, lr_0 = 2.0675e-04
Loss = 1.9958e-03, PNorm = 170.6425, GNorm = 0.0542, lr_0 = 2.0661e-04
Loss = 1.9529e-03, PNorm = 170.6459, GNorm = 0.0592, lr_0 = 2.0647e-04
Loss = 2.3692e-03, PNorm = 170.6487, GNorm = 0.2292, lr_0 = 2.0633e-04
Loss = 2.1187e-03, PNorm = 170.6511, GNorm = 0.1462, lr_0 = 2.0618e-04
Loss = 2.5643e-03, PNorm = 170.6569, GNorm = 0.1087, lr_0 = 2.0604e-04
Loss = 2.3683e-03, PNorm = 170.6614, GNorm = 0.1376, lr_0 = 2.0590e-04
Loss = 3.2909e-03, PNorm = 170.6636, GNorm = 0.1996, lr_0 = 2.0576e-04
Loss = 2.7985e-03, PNorm = 170.6663, GNorm = 0.1185, lr_0 = 2.0562e-04
Loss = 2.9460e-03, PNorm = 170.6692, GNorm = 0.1198, lr_0 = 2.0548e-04
Loss = 3.9456e-03, PNorm = 170.6727, GNorm = 0.1181, lr_0 = 2.0534e-04
Loss = 2.1616e-03, PNorm = 170.6794, GNorm = 0.2557, lr_0 = 2.0520e-04
Loss = 1.7223e-03, PNorm = 170.6840, GNorm = 0.1032, lr_0 = 2.0506e-04
Loss = 1.9454e-03, PNorm = 170.6873, GNorm = 0.0502, lr_0 = 2.0492e-04
Loss = 3.6853e-03, PNorm = 170.6903, GNorm = 0.1156, lr_0 = 2.0478e-04
Loss = 1.8749e-03, PNorm = 170.6957, GNorm = 0.2917, lr_0 = 2.0464e-04
Loss = 2.0130e-03, PNorm = 170.7000, GNorm = 0.0698, lr_0 = 2.0450e-04
Loss = 2.9127e-03, PNorm = 170.7042, GNorm = 0.1412, lr_0 = 2.0436e-04
Loss = 2.3701e-03, PNorm = 170.7073, GNorm = 0.0698, lr_0 = 2.0422e-04
Loss = 1.9304e-03, PNorm = 170.7115, GNorm = 0.1206, lr_0 = 2.0408e-04
Loss = 1.9993e-03, PNorm = 170.7150, GNorm = 0.0968, lr_0 = 2.0394e-04
Loss = 2.8597e-03, PNorm = 170.7191, GNorm = 0.1368, lr_0 = 2.0380e-04
Loss = 4.0823e-03, PNorm = 170.7262, GNorm = 0.0559, lr_0 = 2.0366e-04
Loss = 4.4867e-03, PNorm = 170.7308, GNorm = 0.3216, lr_0 = 2.0352e-04
Loss = 4.4667e-03, PNorm = 170.7343, GNorm = 0.1178, lr_0 = 2.0338e-04
Loss = 3.1147e-03, PNorm = 170.7381, GNorm = 0.2723, lr_0 = 2.0324e-04
Loss = 1.8650e-03, PNorm = 170.7447, GNorm = 0.1290, lr_0 = 2.0310e-04
Loss = 3.2222e-03, PNorm = 170.7510, GNorm = 0.1114, lr_0 = 2.0296e-04
Loss = 3.0293e-03, PNorm = 170.7549, GNorm = 0.0676, lr_0 = 2.0282e-04
Loss = 1.9094e-03, PNorm = 170.7570, GNorm = 0.1873, lr_0 = 2.0268e-04
Loss = 2.6497e-03, PNorm = 170.7616, GNorm = 0.0480, lr_0 = 2.0254e-04
Loss = 2.1596e-03, PNorm = 170.7680, GNorm = 0.1366, lr_0 = 2.0240e-04
Loss = 1.8146e-03, PNorm = 170.7708, GNorm = 0.1364, lr_0 = 2.0227e-04
Loss = 2.4821e-03, PNorm = 170.7756, GNorm = 0.1096, lr_0 = 2.0213e-04
Loss = 2.5842e-03, PNorm = 170.7795, GNorm = 0.1464, lr_0 = 2.0199e-04
Loss = 2.1304e-03, PNorm = 170.7853, GNorm = 0.2808, lr_0 = 2.0185e-04
Loss = 2.0083e-03, PNorm = 170.7877, GNorm = 0.2033, lr_0 = 2.0171e-04
Loss = 2.1440e-03, PNorm = 170.7934, GNorm = 0.1359, lr_0 = 2.0157e-04
Loss = 2.0542e-03, PNorm = 170.7962, GNorm = 0.1158, lr_0 = 2.0144e-04
Loss = 4.2705e-03, PNorm = 170.7998, GNorm = 0.3719, lr_0 = 2.0130e-04
Loss = 1.7693e-03, PNorm = 170.8040, GNorm = 0.0932, lr_0 = 2.0116e-04
Loss = 2.3325e-03, PNorm = 170.8097, GNorm = 0.2460, lr_0 = 2.0102e-04
Loss = 1.9698e-03, PNorm = 170.8151, GNorm = 0.1108, lr_0 = 2.0088e-04
Loss = 4.1290e-03, PNorm = 170.8201, GNorm = 0.4810, lr_0 = 2.0075e-04
Loss = 3.7675e-03, PNorm = 170.8234, GNorm = 0.1402, lr_0 = 2.0061e-04
Loss = 2.2790e-03, PNorm = 170.8275, GNorm = 0.2514, lr_0 = 2.0047e-04
Loss = 2.8719e-03, PNorm = 170.8314, GNorm = 0.0559, lr_0 = 2.0033e-04
Loss = 2.0142e-03, PNorm = 170.8349, GNorm = 0.1074, lr_0 = 2.0020e-04
Loss = 2.3082e-03, PNorm = 170.8390, GNorm = 0.1025, lr_0 = 2.0006e-04
Loss = 1.7872e-03, PNorm = 170.8444, GNorm = 0.0691, lr_0 = 1.9992e-04
Loss = 1.8251e-03, PNorm = 170.8505, GNorm = 0.0387, lr_0 = 1.9979e-04
Loss = 3.4611e-03, PNorm = 170.8555, GNorm = 0.1534, lr_0 = 1.9965e-04
Loss = 2.7624e-03, PNorm = 170.8574, GNorm = 0.0886, lr_0 = 1.9951e-04
Loss = 1.9928e-03, PNorm = 170.8612, GNorm = 0.0661, lr_0 = 1.9938e-04
Loss = 2.2024e-03, PNorm = 170.8645, GNorm = 0.1898, lr_0 = 1.9924e-04
Loss = 2.5079e-03, PNorm = 170.8694, GNorm = 0.2291, lr_0 = 1.9910e-04
Loss = 2.1170e-03, PNorm = 170.8754, GNorm = 0.1522, lr_0 = 1.9897e-04
Loss = 2.9093e-03, PNorm = 170.8805, GNorm = 0.1156, lr_0 = 1.9883e-04
Loss = 1.8061e-03, PNorm = 170.8836, GNorm = 0.0631, lr_0 = 1.9869e-04
Loss = 3.4921e-03, PNorm = 170.8864, GNorm = 0.1114, lr_0 = 1.9856e-04
Loss = 2.4237e-03, PNorm = 170.8884, GNorm = 0.1836, lr_0 = 1.9842e-04
Loss = 2.0417e-03, PNorm = 170.8919, GNorm = 0.1919, lr_0 = 1.9829e-04
Loss = 2.3677e-03, PNorm = 170.8945, GNorm = 0.2387, lr_0 = 1.9815e-04
Loss = 3.0341e-03, PNorm = 170.8966, GNorm = 0.2494, lr_0 = 1.9801e-04
Loss = 3.6996e-03, PNorm = 170.8995, GNorm = 0.2404, lr_0 = 1.9788e-04
Loss = 2.3089e-03, PNorm = 170.9037, GNorm = 0.2539, lr_0 = 1.9774e-04
Loss = 3.2563e-03, PNorm = 170.9072, GNorm = 0.1618, lr_0 = 1.9761e-04
Loss = 2.6933e-03, PNorm = 170.9144, GNorm = 0.1734, lr_0 = 1.9747e-04
Loss = 3.0230e-03, PNorm = 170.9200, GNorm = 0.1118, lr_0 = 1.9734e-04
Loss = 1.9982e-03, PNorm = 170.9260, GNorm = 0.0774, lr_0 = 1.9720e-04
Loss = 3.1406e-03, PNorm = 170.9322, GNorm = 0.1137, lr_0 = 1.9707e-04
Loss = 1.6836e-03, PNorm = 170.9361, GNorm = 0.2495, lr_0 = 1.9693e-04
Loss = 1.8132e-03, PNorm = 170.9398, GNorm = 0.2056, lr_0 = 1.9680e-04
Loss = 4.1226e-03, PNorm = 170.9437, GNorm = 0.1973, lr_0 = 1.9666e-04
Loss = 2.7296e-03, PNorm = 170.9480, GNorm = 0.0894, lr_0 = 1.9653e-04
Loss = 3.7292e-03, PNorm = 170.9532, GNorm = 0.1709, lr_0 = 1.9639e-04
Loss = 2.1101e-03, PNorm = 170.9595, GNorm = 0.0844, lr_0 = 1.9626e-04
Loss = 1.9352e-03, PNorm = 170.9632, GNorm = 0.0807, lr_0 = 1.9612e-04
Loss = 2.2012e-03, PNorm = 170.9671, GNorm = 0.0710, lr_0 = 1.9599e-04
Loss = 4.7851e-03, PNorm = 170.9736, GNorm = 0.3693, lr_0 = 1.9585e-04
Loss = 4.6894e-03, PNorm = 170.9789, GNorm = 0.1268, lr_0 = 1.9572e-04
Loss = 4.4717e-03, PNorm = 170.9835, GNorm = 0.2614, lr_0 = 1.9559e-04
Loss = 2.6447e-03, PNorm = 170.9872, GNorm = 0.2094, lr_0 = 1.9545e-04
Loss = 2.9801e-03, PNorm = 170.9904, GNorm = 0.1588, lr_0 = 1.9532e-04
Loss = 2.6409e-03, PNorm = 170.9955, GNorm = 0.4475, lr_0 = 1.9518e-04
Loss = 2.5514e-03, PNorm = 170.9998, GNorm = 0.1487, lr_0 = 1.9505e-04
Loss = 1.8944e-03, PNorm = 171.0050, GNorm = 0.1315, lr_0 = 1.9492e-04
Loss = 2.4573e-03, PNorm = 171.0106, GNorm = 0.0770, lr_0 = 1.9478e-04
Loss = 2.6099e-03, PNorm = 171.0147, GNorm = 0.0622, lr_0 = 1.9465e-04
Loss = 3.7664e-03, PNorm = 171.0183, GNorm = 0.1648, lr_0 = 1.9452e-04
Loss = 2.3238e-03, PNorm = 171.0205, GNorm = 0.2827, lr_0 = 1.9438e-04
Loss = 2.5111e-03, PNorm = 171.0239, GNorm = 0.0950, lr_0 = 1.9425e-04
Loss = 2.0820e-03, PNorm = 171.0270, GNorm = 0.1316, lr_0 = 1.9412e-04
Loss = 2.4174e-03, PNorm = 171.0299, GNorm = 0.2167, lr_0 = 1.9398e-04
Loss = 1.8949e-03, PNorm = 171.0336, GNorm = 0.0557, lr_0 = 1.9385e-04
Loss = 3.7165e-03, PNorm = 171.0400, GNorm = 0.1420, lr_0 = 1.9372e-04
Loss = 3.6236e-03, PNorm = 171.0474, GNorm = 0.1365, lr_0 = 1.9359e-04
Loss = 3.0408e-03, PNorm = 171.0531, GNorm = 0.1145, lr_0 = 1.9345e-04
Loss = 4.0796e-03, PNorm = 171.0590, GNorm = 0.1505, lr_0 = 1.9332e-04
Loss = 2.8663e-03, PNorm = 171.0639, GNorm = 0.1790, lr_0 = 1.9319e-04
Loss = 2.6268e-03, PNorm = 171.0689, GNorm = 0.2485, lr_0 = 1.9306e-04
Validation mae = 0.278081
Epoch 22
Loss = 2.2063e-03, PNorm = 171.0732, GNorm = 0.1828, lr_0 = 1.9292e-04
Loss = 1.5442e-03, PNorm = 171.0760, GNorm = 0.1647, lr_0 = 1.9279e-04
Loss = 3.2882e-03, PNorm = 171.0793, GNorm = 0.1835, lr_0 = 1.9266e-04
Loss = 1.9460e-03, PNorm = 171.0845, GNorm = 0.2751, lr_0 = 1.9253e-04
Loss = 2.1505e-03, PNorm = 171.0880, GNorm = 0.1616, lr_0 = 1.9240e-04
Loss = 2.2000e-03, PNorm = 171.0928, GNorm = 0.0880, lr_0 = 1.9226e-04
Loss = 1.4893e-03, PNorm = 171.0953, GNorm = 0.0592, lr_0 = 1.9213e-04
Loss = 3.9568e-03, PNorm = 171.0989, GNorm = 0.4274, lr_0 = 1.9200e-04
Loss = 1.8688e-03, PNorm = 171.1009, GNorm = 0.1357, lr_0 = 1.9187e-04
Loss = 2.1085e-03, PNorm = 171.1040, GNorm = 0.2170, lr_0 = 1.9174e-04
Loss = 3.1273e-03, PNorm = 171.1077, GNorm = 0.2293, lr_0 = 1.9161e-04
Loss = 3.2054e-03, PNorm = 171.1116, GNorm = 0.2052, lr_0 = 1.9148e-04
Loss = 1.5641e-03, PNorm = 171.1152, GNorm = 0.0971, lr_0 = 1.9134e-04
Loss = 1.5967e-03, PNorm = 171.1188, GNorm = 0.1953, lr_0 = 1.9121e-04
Loss = 1.6190e-03, PNorm = 171.1211, GNorm = 0.0563, lr_0 = 1.9108e-04
Loss = 2.5255e-03, PNorm = 171.1234, GNorm = 0.1289, lr_0 = 1.9095e-04
Loss = 1.7081e-03, PNorm = 171.1267, GNorm = 0.0832, lr_0 = 1.9082e-04
Loss = 2.3386e-03, PNorm = 171.1300, GNorm = 0.2878, lr_0 = 1.9069e-04
Loss = 2.1460e-03, PNorm = 171.1325, GNorm = 0.1772, lr_0 = 1.9056e-04
Loss = 1.9291e-03, PNorm = 171.1342, GNorm = 0.2240, lr_0 = 1.9043e-04
Loss = 1.8946e-03, PNorm = 171.1381, GNorm = 0.1460, lr_0 = 1.9030e-04
Loss = 1.7884e-03, PNorm = 171.1410, GNorm = 0.1832, lr_0 = 1.9017e-04
Loss = 1.7809e-03, PNorm = 171.1456, GNorm = 0.0830, lr_0 = 1.9004e-04
Loss = 1.4478e-03, PNorm = 171.1483, GNorm = 0.1320, lr_0 = 1.8991e-04
Loss = 2.9333e-03, PNorm = 171.1505, GNorm = 0.1065, lr_0 = 1.8978e-04
Loss = 2.3611e-03, PNorm = 171.1533, GNorm = 0.1813, lr_0 = 1.8965e-04
Loss = 2.7062e-03, PNorm = 171.1585, GNorm = 0.0543, lr_0 = 1.8952e-04
Loss = 3.2145e-03, PNorm = 171.1621, GNorm = 0.0847, lr_0 = 1.8939e-04
Loss = 2.4614e-03, PNorm = 171.1657, GNorm = 0.1345, lr_0 = 1.8926e-04
Loss = 2.7634e-03, PNorm = 171.1678, GNorm = 0.1717, lr_0 = 1.8913e-04
Loss = 1.5514e-03, PNorm = 171.1724, GNorm = 0.0445, lr_0 = 1.8900e-04
Loss = 2.5088e-03, PNorm = 171.1760, GNorm = 0.2061, lr_0 = 1.8887e-04
Loss = 3.4784e-03, PNorm = 171.1793, GNorm = 0.0809, lr_0 = 1.8874e-04
Loss = 2.2205e-03, PNorm = 171.1836, GNorm = 0.1506, lr_0 = 1.8861e-04
Loss = 3.0387e-03, PNorm = 171.1892, GNorm = 0.1290, lr_0 = 1.8848e-04
Loss = 1.6345e-03, PNorm = 171.1928, GNorm = 0.1869, lr_0 = 1.8835e-04
Loss = 2.4045e-03, PNorm = 171.1965, GNorm = 0.1073, lr_0 = 1.8822e-04
Loss = 1.7800e-03, PNorm = 171.2001, GNorm = 0.1356, lr_0 = 1.8809e-04
Loss = 1.6198e-03, PNorm = 171.2034, GNorm = 0.0578, lr_0 = 1.8797e-04
Loss = 3.2720e-03, PNorm = 171.2076, GNorm = 0.1624, lr_0 = 1.8784e-04
Loss = 1.6738e-03, PNorm = 171.2111, GNorm = 0.1648, lr_0 = 1.8771e-04
Loss = 3.4494e-03, PNorm = 171.2140, GNorm = 0.0913, lr_0 = 1.8758e-04
Loss = 1.5543e-03, PNorm = 171.2171, GNorm = 0.1375, lr_0 = 1.8745e-04
Loss = 1.8466e-03, PNorm = 171.2208, GNorm = 0.1402, lr_0 = 1.8732e-04
Loss = 2.2258e-03, PNorm = 171.2255, GNorm = 0.1395, lr_0 = 1.8719e-04
Loss = 2.0169e-03, PNorm = 171.2299, GNorm = 0.0805, lr_0 = 1.8707e-04
Loss = 2.7524e-03, PNorm = 171.2333, GNorm = 0.5025, lr_0 = 1.8694e-04
Loss = 3.1560e-03, PNorm = 171.2383, GNorm = 0.1002, lr_0 = 1.8681e-04
Loss = 2.9249e-03, PNorm = 171.2418, GNorm = 0.2548, lr_0 = 1.8668e-04
Loss = 2.6549e-03, PNorm = 171.2459, GNorm = 0.0828, lr_0 = 1.8655e-04
Loss = 1.7768e-03, PNorm = 171.2492, GNorm = 0.1641, lr_0 = 1.8643e-04
Loss = 3.5216e-03, PNorm = 171.2529, GNorm = 0.5969, lr_0 = 1.8630e-04
Loss = 1.8766e-03, PNorm = 171.2566, GNorm = 0.1576, lr_0 = 1.8617e-04
Loss = 1.6842e-03, PNorm = 171.2598, GNorm = 0.2183, lr_0 = 1.8604e-04
Loss = 1.8622e-03, PNorm = 171.2646, GNorm = 0.1503, lr_0 = 1.8592e-04
Loss = 1.7655e-03, PNorm = 171.2690, GNorm = 0.0767, lr_0 = 1.8579e-04
Loss = 1.7772e-03, PNorm = 171.2725, GNorm = 0.0805, lr_0 = 1.8566e-04
Loss = 2.4133e-03, PNorm = 171.2774, GNorm = 0.4691, lr_0 = 1.8553e-04
Loss = 2.4604e-03, PNorm = 171.2796, GNorm = 0.2658, lr_0 = 1.8541e-04
Loss = 2.4350e-03, PNorm = 171.2836, GNorm = 0.2332, lr_0 = 1.8528e-04
Loss = 1.6488e-03, PNorm = 171.2859, GNorm = 0.0379, lr_0 = 1.8515e-04
Loss = 2.3851e-03, PNorm = 171.2884, GNorm = 0.1204, lr_0 = 1.8503e-04
Loss = 1.7506e-03, PNorm = 171.2917, GNorm = 0.0925, lr_0 = 1.8490e-04
Loss = 2.6703e-03, PNorm = 171.2959, GNorm = 0.0781, lr_0 = 1.8477e-04
Loss = 1.8923e-03, PNorm = 171.2983, GNorm = 0.1002, lr_0 = 1.8465e-04
Loss = 2.8228e-03, PNorm = 171.3024, GNorm = 0.1558, lr_0 = 1.8452e-04
Loss = 3.7397e-03, PNorm = 171.3069, GNorm = 0.1724, lr_0 = 1.8439e-04
Loss = 2.9479e-03, PNorm = 171.3114, GNorm = 0.1530, lr_0 = 1.8427e-04
Loss = 1.8257e-03, PNorm = 171.3160, GNorm = 0.1428, lr_0 = 1.8414e-04
Loss = 2.5239e-03, PNorm = 171.3191, GNorm = 0.1246, lr_0 = 1.8401e-04
Loss = 2.2034e-03, PNorm = 171.3234, GNorm = 0.1438, lr_0 = 1.8389e-04
Loss = 1.4293e-03, PNorm = 171.3271, GNorm = 0.1003, lr_0 = 1.8376e-04
Loss = 1.5287e-03, PNorm = 171.3315, GNorm = 0.1781, lr_0 = 1.8364e-04
Loss = 1.3778e-03, PNorm = 171.3369, GNorm = 0.2419, lr_0 = 1.8351e-04
Loss = 1.9744e-03, PNorm = 171.3417, GNorm = 0.1572, lr_0 = 1.8338e-04
Loss = 3.6662e-03, PNorm = 171.3450, GNorm = 0.1808, lr_0 = 1.8326e-04
Loss = 2.5410e-03, PNorm = 171.3479, GNorm = 0.1283, lr_0 = 1.8313e-04
Loss = 4.8541e-03, PNorm = 171.3508, GNorm = 0.3387, lr_0 = 1.8301e-04
Loss = 2.6403e-03, PNorm = 171.3530, GNorm = 0.1611, lr_0 = 1.8288e-04
Loss = 1.7504e-03, PNorm = 171.3556, GNorm = 0.0814, lr_0 = 1.8276e-04
Loss = 2.2355e-03, PNorm = 171.3564, GNorm = 0.1838, lr_0 = 1.8263e-04
Loss = 1.7659e-03, PNorm = 171.3602, GNorm = 0.1647, lr_0 = 1.8251e-04
Loss = 2.0523e-03, PNorm = 171.3659, GNorm = 0.0936, lr_0 = 1.8238e-04
Loss = 2.0254e-03, PNorm = 171.3723, GNorm = 0.0740, lr_0 = 1.8226e-04
Loss = 1.6405e-03, PNorm = 171.3751, GNorm = 0.0434, lr_0 = 1.8213e-04
Loss = 3.0861e-03, PNorm = 171.3762, GNorm = 0.0845, lr_0 = 1.8201e-04
Loss = 2.3577e-03, PNorm = 171.3786, GNorm = 0.0662, lr_0 = 1.8188e-04
Loss = 2.3619e-03, PNorm = 171.3838, GNorm = 0.1399, lr_0 = 1.8176e-04
Loss = 5.0741e-03, PNorm = 171.3892, GNorm = 0.0833, lr_0 = 1.8163e-04
Loss = 2.1681e-03, PNorm = 171.3928, GNorm = 0.1167, lr_0 = 1.8151e-04
Loss = 2.2369e-03, PNorm = 171.3958, GNorm = 0.0522, lr_0 = 1.8138e-04
Loss = 2.5229e-03, PNorm = 171.3990, GNorm = 0.1434, lr_0 = 1.8126e-04
Loss = 2.4036e-03, PNorm = 171.4013, GNorm = 0.0730, lr_0 = 1.8114e-04
Loss = 2.7904e-03, PNorm = 171.4051, GNorm = 0.2140, lr_0 = 1.8101e-04
Loss = 1.8527e-03, PNorm = 171.4086, GNorm = 0.2484, lr_0 = 1.8089e-04
Loss = 1.7467e-03, PNorm = 171.4129, GNorm = 0.1254, lr_0 = 1.8076e-04
Loss = 1.8871e-03, PNorm = 171.4179, GNorm = 0.0515, lr_0 = 1.8064e-04
Loss = 1.5026e-03, PNorm = 171.4202, GNorm = 0.2037, lr_0 = 1.8052e-04
Loss = 1.4755e-03, PNorm = 171.4234, GNorm = 0.0814, lr_0 = 1.8039e-04
Loss = 5.3221e-03, PNorm = 171.4277, GNorm = 0.1436, lr_0 = 1.8027e-04
Loss = 2.1599e-03, PNorm = 171.4323, GNorm = 0.1176, lr_0 = 1.8015e-04
Loss = 2.3714e-03, PNorm = 171.4375, GNorm = 0.0915, lr_0 = 1.8002e-04
Loss = 2.4072e-03, PNorm = 171.4411, GNorm = 0.2295, lr_0 = 1.7990e-04
Loss = 3.1171e-03, PNorm = 171.4447, GNorm = 0.0333, lr_0 = 1.7978e-04
Loss = 1.5336e-03, PNorm = 171.4496, GNorm = 0.0835, lr_0 = 1.7965e-04
Loss = 2.1458e-03, PNorm = 171.4550, GNorm = 0.0634, lr_0 = 1.7953e-04
Loss = 3.6685e-03, PNorm = 171.4605, GNorm = 0.2282, lr_0 = 1.7941e-04
Loss = 2.0224e-03, PNorm = 171.4625, GNorm = 0.1846, lr_0 = 1.7928e-04
Loss = 4.4825e-03, PNorm = 171.4644, GNorm = 0.7409, lr_0 = 1.7916e-04
Loss = 2.2547e-03, PNorm = 171.4653, GNorm = 0.1571, lr_0 = 1.7904e-04
Loss = 3.1055e-03, PNorm = 171.4667, GNorm = 0.1447, lr_0 = 1.7892e-04
Loss = 2.9120e-03, PNorm = 171.4712, GNorm = 0.1539, lr_0 = 1.7879e-04
Loss = 1.7004e-03, PNorm = 171.4791, GNorm = 0.1234, lr_0 = 1.7867e-04
Loss = 1.9182e-03, PNorm = 171.4860, GNorm = 0.1918, lr_0 = 1.7855e-04
Loss = 2.9825e-03, PNorm = 171.4920, GNorm = 0.1470, lr_0 = 1.7843e-04
Loss = 1.9219e-03, PNorm = 171.4970, GNorm = 0.1645, lr_0 = 1.7830e-04
Loss = 3.5161e-03, PNorm = 171.5026, GNorm = 0.1062, lr_0 = 1.7818e-04
Loss = 1.8409e-03, PNorm = 171.5077, GNorm = 0.1192, lr_0 = 1.7806e-04
Loss = 2.0234e-03, PNorm = 171.5123, GNorm = 0.1340, lr_0 = 1.7794e-04
Loss = 1.9663e-03, PNorm = 171.5142, GNorm = 0.1836, lr_0 = 1.7782e-04
Validation mae = 0.278282
Epoch 23
Loss = 1.4637e-03, PNorm = 171.5167, GNorm = 0.1101, lr_0 = 1.7769e-04
Loss = 2.2034e-03, PNorm = 171.5203, GNorm = 0.1571, lr_0 = 1.7757e-04
Loss = 1.4159e-03, PNorm = 171.5224, GNorm = 0.1023, lr_0 = 1.7745e-04
Loss = 2.1384e-03, PNorm = 171.5255, GNorm = 0.1027, lr_0 = 1.7733e-04
Loss = 2.8973e-03, PNorm = 171.5270, GNorm = 0.1249, lr_0 = 1.7721e-04
Loss = 1.5719e-03, PNorm = 171.5289, GNorm = 0.1489, lr_0 = 1.7709e-04
Loss = 1.4799e-03, PNorm = 171.5303, GNorm = 0.1173, lr_0 = 1.7696e-04
Loss = 1.5010e-03, PNorm = 171.5318, GNorm = 0.0557, lr_0 = 1.7684e-04
Loss = 1.2640e-03, PNorm = 171.5335, GNorm = 0.1455, lr_0 = 1.7672e-04
Loss = 1.5055e-03, PNorm = 171.5347, GNorm = 0.0716, lr_0 = 1.7660e-04
Loss = 1.6794e-03, PNorm = 171.5358, GNorm = 0.0941, lr_0 = 1.7648e-04
Loss = 1.8870e-03, PNorm = 171.5389, GNorm = 0.0588, lr_0 = 1.7636e-04
Loss = 1.7208e-03, PNorm = 171.5432, GNorm = 0.0536, lr_0 = 1.7624e-04
Loss = 4.2456e-03, PNorm = 171.5465, GNorm = 0.1070, lr_0 = 1.7612e-04
Loss = 1.6424e-03, PNorm = 171.5497, GNorm = 0.0821, lr_0 = 1.7600e-04
Loss = 1.7435e-03, PNorm = 171.5525, GNorm = 0.1369, lr_0 = 1.7588e-04
Loss = 1.4523e-03, PNorm = 171.5565, GNorm = 0.0475, lr_0 = 1.7576e-04
Loss = 1.3622e-03, PNorm = 171.5594, GNorm = 0.0856, lr_0 = 1.7564e-04
Loss = 2.2815e-03, PNorm = 171.5605, GNorm = 0.1272, lr_0 = 1.7552e-04
Loss = 1.5469e-03, PNorm = 171.5617, GNorm = 0.0508, lr_0 = 1.7540e-04
Loss = 1.9997e-03, PNorm = 171.5635, GNorm = 0.2282, lr_0 = 1.7528e-04
Loss = 1.9799e-03, PNorm = 171.5659, GNorm = 0.1684, lr_0 = 1.7516e-04
Loss = 3.4391e-03, PNorm = 171.5688, GNorm = 0.2588, lr_0 = 1.7504e-04
Loss = 2.0891e-03, PNorm = 171.5738, GNorm = 0.2706, lr_0 = 1.7492e-04
Loss = 1.6549e-03, PNorm = 171.5772, GNorm = 0.0484, lr_0 = 1.7480e-04
Loss = 1.4458e-03, PNorm = 171.5808, GNorm = 0.1808, lr_0 = 1.7468e-04
Loss = 1.9361e-03, PNorm = 171.5834, GNorm = 0.0801, lr_0 = 1.7456e-04
Loss = 3.7747e-03, PNorm = 171.5849, GNorm = 0.1389, lr_0 = 1.7444e-04
Loss = 3.1557e-03, PNorm = 171.5860, GNorm = 0.2393, lr_0 = 1.7432e-04
Loss = 1.5552e-03, PNorm = 171.5871, GNorm = 0.1758, lr_0 = 1.7420e-04
Loss = 2.4489e-03, PNorm = 171.5915, GNorm = 0.0610, lr_0 = 1.7408e-04
Loss = 1.6936e-03, PNorm = 171.5970, GNorm = 0.1524, lr_0 = 1.7396e-04
Loss = 4.3493e-03, PNorm = 171.6026, GNorm = 0.1211, lr_0 = 1.7384e-04
Loss = 1.4308e-03, PNorm = 171.6067, GNorm = 0.0450, lr_0 = 1.7372e-04
Loss = 1.8584e-03, PNorm = 171.6102, GNorm = 0.0684, lr_0 = 1.7360e-04
Loss = 2.2958e-03, PNorm = 171.6146, GNorm = 0.3382, lr_0 = 1.7348e-04
Loss = 1.4641e-03, PNorm = 171.6183, GNorm = 0.2113, lr_0 = 1.7336e-04
Loss = 2.5528e-03, PNorm = 171.6214, GNorm = 0.1621, lr_0 = 1.7325e-04
Loss = 1.3956e-03, PNorm = 171.6265, GNorm = 0.1782, lr_0 = 1.7313e-04
Loss = 1.6983e-03, PNorm = 171.6288, GNorm = 0.2505, lr_0 = 1.7301e-04
Loss = 2.4863e-03, PNorm = 171.6316, GNorm = 0.1456, lr_0 = 1.7289e-04
Loss = 2.3383e-03, PNorm = 171.6357, GNorm = 0.1953, lr_0 = 1.7277e-04
Loss = 1.8356e-03, PNorm = 171.6381, GNorm = 0.0989, lr_0 = 1.7265e-04
Loss = 1.8049e-03, PNorm = 171.6399, GNorm = 0.1932, lr_0 = 1.7253e-04
Loss = 2.9908e-03, PNorm = 171.6430, GNorm = 0.1061, lr_0 = 1.7242e-04
Loss = 1.9219e-03, PNorm = 171.6468, GNorm = 0.2063, lr_0 = 1.7230e-04
Loss = 2.4011e-03, PNorm = 171.6522, GNorm = 0.1681, lr_0 = 1.7218e-04
Loss = 1.7616e-03, PNorm = 171.6556, GNorm = 0.1126, lr_0 = 1.7206e-04
Loss = 2.1210e-03, PNorm = 171.6574, GNorm = 0.0762, lr_0 = 1.7194e-04
Loss = 1.6584e-03, PNorm = 171.6575, GNorm = 0.0805, lr_0 = 1.7183e-04
Loss = 1.5719e-03, PNorm = 171.6596, GNorm = 0.0765, lr_0 = 1.7171e-04
Loss = 1.3252e-03, PNorm = 171.6630, GNorm = 0.1017, lr_0 = 1.7159e-04
Loss = 1.7076e-03, PNorm = 171.6672, GNorm = 0.2729, lr_0 = 1.7147e-04
Loss = 1.4209e-03, PNorm = 171.6699, GNorm = 0.1340, lr_0 = 1.7136e-04
Loss = 3.0351e-03, PNorm = 171.6728, GNorm = 0.1528, lr_0 = 1.7124e-04
Loss = 1.8479e-03, PNorm = 171.6754, GNorm = 0.1117, lr_0 = 1.7112e-04
Loss = 2.1480e-03, PNorm = 171.6780, GNorm = 0.4106, lr_0 = 1.7100e-04
Loss = 4.2116e-03, PNorm = 171.6804, GNorm = 0.2897, lr_0 = 1.7089e-04
Loss = 2.0452e-03, PNorm = 171.6828, GNorm = 0.0708, lr_0 = 1.7077e-04
Loss = 1.9878e-03, PNorm = 171.6867, GNorm = 0.1713, lr_0 = 1.7065e-04
Loss = 1.5309e-03, PNorm = 171.6892, GNorm = 0.0798, lr_0 = 1.7054e-04
Loss = 2.4399e-03, PNorm = 171.6923, GNorm = 0.0957, lr_0 = 1.7042e-04
Loss = 1.5709e-03, PNorm = 171.6947, GNorm = 0.2703, lr_0 = 1.7030e-04
Loss = 2.1104e-03, PNorm = 171.6989, GNorm = 0.1638, lr_0 = 1.7019e-04
Loss = 1.6910e-03, PNorm = 171.7013, GNorm = 0.1145, lr_0 = 1.7007e-04
Loss = 2.5588e-03, PNorm = 171.7047, GNorm = 0.1837, lr_0 = 1.6995e-04
Loss = 2.2253e-03, PNorm = 171.7079, GNorm = 0.1198, lr_0 = 1.6984e-04
Loss = 5.0978e-03, PNorm = 171.7119, GNorm = 0.4204, lr_0 = 1.6972e-04
Loss = 3.1049e-03, PNorm = 171.7145, GNorm = 0.2469, lr_0 = 1.6960e-04
Loss = 1.2477e-03, PNorm = 171.7172, GNorm = 0.1051, lr_0 = 1.6949e-04
Loss = 1.5097e-03, PNorm = 171.7190, GNorm = 0.0879, lr_0 = 1.6937e-04
Loss = 1.9905e-03, PNorm = 171.7216, GNorm = 0.1283, lr_0 = 1.6926e-04
Loss = 1.2848e-03, PNorm = 171.7245, GNorm = 0.0520, lr_0 = 1.6914e-04
Loss = 2.6004e-03, PNorm = 171.7282, GNorm = 0.0720, lr_0 = 1.6902e-04
Loss = 2.1067e-03, PNorm = 171.7332, GNorm = 0.1372, lr_0 = 1.6891e-04
Loss = 2.9610e-03, PNorm = 171.7386, GNorm = 0.0824, lr_0 = 1.6879e-04
Loss = 2.3636e-03, PNorm = 171.7434, GNorm = 0.0392, lr_0 = 1.6868e-04
Loss = 4.3367e-03, PNorm = 171.7473, GNorm = 0.5632, lr_0 = 1.6856e-04
Loss = 1.7352e-03, PNorm = 171.7502, GNorm = 0.2560, lr_0 = 1.6845e-04
Loss = 2.4556e-03, PNorm = 171.7548, GNorm = 0.0890, lr_0 = 1.6833e-04
Loss = 1.5740e-03, PNorm = 171.7573, GNorm = 0.1644, lr_0 = 1.6821e-04
Loss = 1.6961e-03, PNorm = 171.7599, GNorm = 0.4148, lr_0 = 1.6810e-04
Loss = 2.7497e-03, PNorm = 171.7632, GNorm = 0.0837, lr_0 = 1.6798e-04
Loss = 1.2554e-03, PNorm = 171.7667, GNorm = 0.1053, lr_0 = 1.6787e-04
Loss = 1.4535e-03, PNorm = 171.7703, GNorm = 0.0855, lr_0 = 1.6775e-04
Loss = 2.0218e-03, PNorm = 171.7724, GNorm = 0.0333, lr_0 = 1.6764e-04
Loss = 1.5566e-03, PNorm = 171.7738, GNorm = 0.0696, lr_0 = 1.6752e-04
Loss = 1.8130e-03, PNorm = 171.7782, GNorm = 0.2605, lr_0 = 1.6741e-04
Loss = 2.8375e-03, PNorm = 171.7821, GNorm = 0.3808, lr_0 = 1.6729e-04
Loss = 3.7685e-03, PNorm = 171.7858, GNorm = 0.1615, lr_0 = 1.6718e-04
Loss = 1.8493e-03, PNorm = 171.7888, GNorm = 0.0593, lr_0 = 1.6707e-04
Loss = 1.3308e-03, PNorm = 171.7921, GNorm = 0.1698, lr_0 = 1.6695e-04
Loss = 1.8903e-03, PNorm = 171.7952, GNorm = 0.0466, lr_0 = 1.6684e-04
Loss = 1.2887e-03, PNorm = 171.7995, GNorm = 0.1093, lr_0 = 1.6672e-04
Loss = 1.7814e-03, PNorm = 171.8034, GNorm = 0.1287, lr_0 = 1.6661e-04
Loss = 1.9980e-03, PNorm = 171.8074, GNorm = 0.0453, lr_0 = 1.6649e-04
Loss = 2.4884e-03, PNorm = 171.8101, GNorm = 0.1533, lr_0 = 1.6638e-04
Loss = 1.3668e-03, PNorm = 171.8127, GNorm = 0.1054, lr_0 = 1.6627e-04
Loss = 1.5294e-03, PNorm = 171.8141, GNorm = 0.1002, lr_0 = 1.6615e-04
Loss = 2.5798e-03, PNorm = 171.8169, GNorm = 0.0836, lr_0 = 1.6604e-04
Loss = 1.5811e-03, PNorm = 171.8196, GNorm = 0.1174, lr_0 = 1.6592e-04
Loss = 1.3570e-03, PNorm = 171.8224, GNorm = 0.0852, lr_0 = 1.6581e-04
Loss = 1.5271e-03, PNorm = 171.8264, GNorm = 0.1275, lr_0 = 1.6570e-04
Loss = 2.0536e-03, PNorm = 171.8292, GNorm = 0.1163, lr_0 = 1.6558e-04
Loss = 2.7695e-03, PNorm = 171.8305, GNorm = 0.1073, lr_0 = 1.6547e-04
Loss = 3.2674e-03, PNorm = 171.8321, GNorm = 0.2342, lr_0 = 1.6536e-04
Loss = 2.8186e-03, PNorm = 171.8348, GNorm = 0.1389, lr_0 = 1.6524e-04
Loss = 3.9851e-03, PNorm = 171.8380, GNorm = 0.6481, lr_0 = 1.6513e-04
Loss = 1.8913e-03, PNorm = 171.8423, GNorm = 0.0727, lr_0 = 1.6502e-04
Loss = 1.6746e-03, PNorm = 171.8464, GNorm = 0.1275, lr_0 = 1.6490e-04
Loss = 2.2505e-03, PNorm = 171.8508, GNorm = 0.1136, lr_0 = 1.6479e-04
Loss = 2.0569e-03, PNorm = 171.8552, GNorm = 0.0779, lr_0 = 1.6468e-04
Loss = 1.4862e-03, PNorm = 171.8568, GNorm = 0.0542, lr_0 = 1.6457e-04
Loss = 1.2234e-03, PNorm = 171.8612, GNorm = 0.1235, lr_0 = 1.6445e-04
Loss = 2.2250e-03, PNorm = 171.8648, GNorm = 0.4893, lr_0 = 1.6434e-04
Loss = 4.2014e-03, PNorm = 171.8675, GNorm = 0.2128, lr_0 = 1.6423e-04
Loss = 5.0953e-03, PNorm = 171.8713, GNorm = 0.1399, lr_0 = 1.6412e-04
Loss = 2.4586e-03, PNorm = 171.8757, GNorm = 0.1642, lr_0 = 1.6400e-04
Loss = 1.2723e-03, PNorm = 171.8813, GNorm = 0.1589, lr_0 = 1.6389e-04
Loss = 1.5389e-03, PNorm = 171.8837, GNorm = 0.1999, lr_0 = 1.6378e-04
Validation mae = 0.278105
Epoch 24
Loss = 1.9845e-03, PNorm = 171.8868, GNorm = 0.1400, lr_0 = 1.6367e-04
Loss = 1.2283e-03, PNorm = 171.8872, GNorm = 0.1327, lr_0 = 1.6355e-04
Loss = 2.1255e-03, PNorm = 171.8897, GNorm = 0.0475, lr_0 = 1.6344e-04
Loss = 1.7705e-03, PNorm = 171.8913, GNorm = 0.1314, lr_0 = 1.6333e-04
Loss = 2.5716e-03, PNorm = 171.8939, GNorm = 0.1759, lr_0 = 1.6322e-04
Loss = 1.5678e-03, PNorm = 171.8972, GNorm = 0.4376, lr_0 = 1.6311e-04
Loss = 2.5227e-03, PNorm = 171.8990, GNorm = 0.0887, lr_0 = 1.6299e-04
Loss = 1.3124e-03, PNorm = 171.9026, GNorm = 0.1384, lr_0 = 1.6288e-04
Loss = 2.2634e-03, PNorm = 171.9054, GNorm = 0.2839, lr_0 = 1.6277e-04
Loss = 3.2432e-03, PNorm = 171.9083, GNorm = 0.0395, lr_0 = 1.6266e-04
Loss = 2.7941e-03, PNorm = 171.9100, GNorm = 0.1069, lr_0 = 1.6255e-04
Loss = 1.0912e-03, PNorm = 171.9117, GNorm = 0.0801, lr_0 = 1.6244e-04
Loss = 1.9054e-03, PNorm = 171.9145, GNorm = 0.1039, lr_0 = 1.6233e-04
Loss = 1.3632e-03, PNorm = 171.9184, GNorm = 0.1503, lr_0 = 1.6221e-04
Loss = 1.1259e-03, PNorm = 171.9210, GNorm = 0.0900, lr_0 = 1.6210e-04
Loss = 2.3672e-03, PNorm = 171.9237, GNorm = 0.1221, lr_0 = 1.6199e-04
Loss = 1.9582e-03, PNorm = 171.9254, GNorm = 0.1342, lr_0 = 1.6188e-04
Loss = 1.2977e-03, PNorm = 171.9286, GNorm = 0.0664, lr_0 = 1.6177e-04
Loss = 1.1706e-03, PNorm = 171.9311, GNorm = 0.1201, lr_0 = 1.6166e-04
Loss = 1.1206e-03, PNorm = 171.9336, GNorm = 0.1324, lr_0 = 1.6155e-04
Loss = 1.5848e-03, PNorm = 171.9359, GNorm = 0.1496, lr_0 = 1.6144e-04
Loss = 2.1844e-03, PNorm = 171.9366, GNorm = 0.0485, lr_0 = 1.6133e-04
Loss = 1.2313e-03, PNorm = 171.9386, GNorm = 0.0427, lr_0 = 1.6122e-04
Loss = 1.3026e-03, PNorm = 171.9410, GNorm = 0.1662, lr_0 = 1.6111e-04
Loss = 1.3557e-03, PNorm = 171.9420, GNorm = 0.1103, lr_0 = 1.6100e-04
Loss = 1.6571e-03, PNorm = 171.9450, GNorm = 0.1312, lr_0 = 1.6089e-04
Loss = 1.0313e-03, PNorm = 171.9474, GNorm = 0.1273, lr_0 = 1.6078e-04
Loss = 1.7661e-03, PNorm = 171.9504, GNorm = 0.1979, lr_0 = 1.6067e-04
Loss = 1.4672e-03, PNorm = 171.9530, GNorm = 0.1373, lr_0 = 1.6056e-04
Loss = 1.4045e-03, PNorm = 171.9547, GNorm = 0.1701, lr_0 = 1.6045e-04
Loss = 2.6699e-03, PNorm = 171.9563, GNorm = 0.3972, lr_0 = 1.6034e-04
Loss = 1.1949e-03, PNorm = 171.9600, GNorm = 0.0845, lr_0 = 1.6023e-04
Loss = 1.0749e-03, PNorm = 171.9633, GNorm = 0.0917, lr_0 = 1.6012e-04
Loss = 3.0450e-03, PNorm = 171.9665, GNorm = 0.3116, lr_0 = 1.6001e-04
Loss = 1.8501e-03, PNorm = 171.9688, GNorm = 0.0617, lr_0 = 1.5990e-04
Loss = 1.6718e-03, PNorm = 171.9715, GNorm = 0.0400, lr_0 = 1.5979e-04
Loss = 3.2286e-03, PNorm = 171.9758, GNorm = 0.0401, lr_0 = 1.5968e-04
Loss = 4.2284e-03, PNorm = 171.9790, GNorm = 0.1866, lr_0 = 1.5957e-04
Loss = 1.3668e-03, PNorm = 171.9822, GNorm = 0.1370, lr_0 = 1.5946e-04
Loss = 1.8618e-03, PNorm = 171.9861, GNorm = 0.0631, lr_0 = 1.5935e-04
Loss = 1.0539e-03, PNorm = 171.9896, GNorm = 0.1153, lr_0 = 1.5924e-04
Loss = 1.1622e-03, PNorm = 171.9907, GNorm = 0.1056, lr_0 = 1.5913e-04
Loss = 1.6889e-03, PNorm = 171.9934, GNorm = 0.1608, lr_0 = 1.5902e-04
Loss = 1.3401e-03, PNorm = 171.9945, GNorm = 0.3148, lr_0 = 1.5891e-04
Loss = 3.3699e-03, PNorm = 171.9966, GNorm = 0.1091, lr_0 = 1.5880e-04
Loss = 1.3146e-03, PNorm = 172.0003, GNorm = 0.1148, lr_0 = 1.5870e-04
Loss = 1.6613e-03, PNorm = 172.0024, GNorm = 0.1349, lr_0 = 1.5859e-04
Loss = 1.8547e-03, PNorm = 172.0059, GNorm = 0.1443, lr_0 = 1.5848e-04
Loss = 1.3918e-03, PNorm = 172.0080, GNorm = 0.1014, lr_0 = 1.5837e-04
Loss = 2.2211e-03, PNorm = 172.0099, GNorm = 0.2444, lr_0 = 1.5826e-04
Loss = 1.9290e-03, PNorm = 172.0140, GNorm = 0.2107, lr_0 = 1.5815e-04
Loss = 1.3031e-03, PNorm = 172.0150, GNorm = 0.0447, lr_0 = 1.5804e-04
Loss = 1.6067e-03, PNorm = 172.0161, GNorm = 0.2993, lr_0 = 1.5794e-04
Loss = 2.7512e-03, PNorm = 172.0172, GNorm = 0.1720, lr_0 = 1.5783e-04
Loss = 3.4084e-03, PNorm = 172.0207, GNorm = 0.0851, lr_0 = 1.5772e-04
Loss = 1.6890e-03, PNorm = 172.0252, GNorm = 0.0636, lr_0 = 1.5761e-04
Loss = 2.6497e-03, PNorm = 172.0286, GNorm = 0.1378, lr_0 = 1.5750e-04
Loss = 2.8016e-03, PNorm = 172.0303, GNorm = 0.1715, lr_0 = 1.5740e-04
Loss = 1.4714e-03, PNorm = 172.0342, GNorm = 0.1168, lr_0 = 1.5729e-04
Loss = 2.7242e-03, PNorm = 172.0363, GNorm = 0.0690, lr_0 = 1.5718e-04
Loss = 1.6332e-03, PNorm = 172.0401, GNorm = 0.0608, lr_0 = 1.5707e-04
Loss = 1.4953e-03, PNorm = 172.0427, GNorm = 0.1546, lr_0 = 1.5697e-04
Loss = 1.4915e-03, PNorm = 172.0457, GNorm = 0.0924, lr_0 = 1.5686e-04
Loss = 1.2797e-03, PNorm = 172.0475, GNorm = 0.0573, lr_0 = 1.5675e-04
Loss = 2.4604e-03, PNorm = 172.0497, GNorm = 0.0478, lr_0 = 1.5664e-04
Loss = 2.9152e-03, PNorm = 172.0526, GNorm = 0.1512, lr_0 = 1.5654e-04
Loss = 1.2712e-03, PNorm = 172.0559, GNorm = 0.0925, lr_0 = 1.5643e-04
Loss = 2.0173e-03, PNorm = 172.0588, GNorm = 0.1361, lr_0 = 1.5632e-04
Loss = 1.4998e-03, PNorm = 172.0625, GNorm = 0.0600, lr_0 = 1.5621e-04
Loss = 3.3251e-03, PNorm = 172.0658, GNorm = 0.4094, lr_0 = 1.5611e-04
Loss = 2.7130e-03, PNorm = 172.0686, GNorm = 0.2200, lr_0 = 1.5600e-04
Loss = 1.0893e-03, PNorm = 172.0716, GNorm = 0.1508, lr_0 = 1.5589e-04
Loss = 1.5836e-03, PNorm = 172.0732, GNorm = 0.0520, lr_0 = 1.5579e-04
Loss = 1.7251e-03, PNorm = 172.0747, GNorm = 0.1490, lr_0 = 1.5568e-04
Loss = 1.6174e-03, PNorm = 172.0768, GNorm = 0.0847, lr_0 = 1.5557e-04
Loss = 1.9079e-03, PNorm = 172.0783, GNorm = 0.1446, lr_0 = 1.5547e-04
Loss = 1.5352e-03, PNorm = 172.0806, GNorm = 0.1394, lr_0 = 1.5536e-04
Loss = 1.0699e-03, PNorm = 172.0834, GNorm = 0.0713, lr_0 = 1.5525e-04
Loss = 3.1619e-03, PNorm = 172.0875, GNorm = 0.1501, lr_0 = 1.5515e-04
Loss = 2.3114e-03, PNorm = 172.0902, GNorm = 0.0855, lr_0 = 1.5504e-04
Loss = 1.3156e-03, PNorm = 172.0925, GNorm = 0.0675, lr_0 = 1.5493e-04
Loss = 1.5105e-03, PNorm = 172.0936, GNorm = 0.0870, lr_0 = 1.5483e-04
Loss = 1.1070e-03, PNorm = 172.0956, GNorm = 0.0684, lr_0 = 1.5472e-04
Loss = 1.2893e-03, PNorm = 172.0977, GNorm = 0.0871, lr_0 = 1.5462e-04
Loss = 2.0149e-03, PNorm = 172.1002, GNorm = 0.2089, lr_0 = 1.5451e-04
Loss = 1.2914e-03, PNorm = 172.1041, GNorm = 0.1816, lr_0 = 1.5440e-04
Loss = 1.2154e-03, PNorm = 172.1065, GNorm = 0.0398, lr_0 = 1.5430e-04
Loss = 1.4281e-03, PNorm = 172.1089, GNorm = 0.0710, lr_0 = 1.5419e-04
Loss = 1.1536e-03, PNorm = 172.1105, GNorm = 0.0647, lr_0 = 1.5409e-04
Loss = 2.3408e-03, PNorm = 172.1131, GNorm = 0.1780, lr_0 = 1.5398e-04
Loss = 2.2486e-03, PNorm = 172.1149, GNorm = 0.1261, lr_0 = 1.5388e-04
Loss = 1.6643e-03, PNorm = 172.1179, GNorm = 0.1658, lr_0 = 1.5377e-04
Loss = 2.6015e-03, PNorm = 172.1205, GNorm = 0.0955, lr_0 = 1.5367e-04
Loss = 1.5419e-03, PNorm = 172.1240, GNorm = 0.1596, lr_0 = 1.5356e-04
Loss = 1.3462e-03, PNorm = 172.1250, GNorm = 0.2342, lr_0 = 1.5346e-04
Loss = 2.2329e-03, PNorm = 172.1285, GNorm = 0.0994, lr_0 = 1.5335e-04
Loss = 1.4726e-03, PNorm = 172.1308, GNorm = 0.1309, lr_0 = 1.5325e-04
Loss = 1.4463e-03, PNorm = 172.1320, GNorm = 0.0976, lr_0 = 1.5314e-04
Loss = 3.1305e-03, PNorm = 172.1349, GNorm = 0.1204, lr_0 = 1.5304e-04
Loss = 1.4861e-03, PNorm = 172.1383, GNorm = 0.0490, lr_0 = 1.5293e-04
Loss = 2.2537e-03, PNorm = 172.1415, GNorm = 0.1036, lr_0 = 1.5283e-04
Loss = 5.2943e-03, PNorm = 172.1450, GNorm = 0.1376, lr_0 = 1.5272e-04
Loss = 1.3321e-03, PNorm = 172.1480, GNorm = 0.2016, lr_0 = 1.5262e-04
Loss = 2.0498e-03, PNorm = 172.1528, GNorm = 0.1912, lr_0 = 1.5251e-04
Loss = 1.4226e-03, PNorm = 172.1557, GNorm = 0.0672, lr_0 = 1.5241e-04
Loss = 1.7456e-03, PNorm = 172.1595, GNorm = 0.2570, lr_0 = 1.5230e-04
Loss = 2.1079e-03, PNorm = 172.1617, GNorm = 0.0912, lr_0 = 1.5220e-04
Loss = 1.2416e-03, PNorm = 172.1652, GNorm = 0.1160, lr_0 = 1.5209e-04
Loss = 3.1039e-03, PNorm = 172.1683, GNorm = 0.1456, lr_0 = 1.5199e-04
Loss = 2.8678e-03, PNorm = 172.1707, GNorm = 0.1426, lr_0 = 1.5189e-04
Loss = 1.6908e-03, PNorm = 172.1747, GNorm = 0.1554, lr_0 = 1.5178e-04
Loss = 1.5402e-03, PNorm = 172.1781, GNorm = 0.1429, lr_0 = 1.5168e-04
Loss = 3.5894e-03, PNorm = 172.1792, GNorm = 0.1120, lr_0 = 1.5157e-04
Loss = 2.3467e-03, PNorm = 172.1798, GNorm = 0.2105, lr_0 = 1.5147e-04
Loss = 1.2576e-03, PNorm = 172.1819, GNorm = 0.1087, lr_0 = 1.5137e-04
Loss = 3.6423e-03, PNorm = 172.1856, GNorm = 0.2254, lr_0 = 1.5126e-04
Loss = 1.8463e-03, PNorm = 172.1877, GNorm = 0.1380, lr_0 = 1.5116e-04
Loss = 1.5223e-03, PNorm = 172.1911, GNorm = 0.0363, lr_0 = 1.5106e-04
Loss = 1.4188e-03, PNorm = 172.1934, GNorm = 0.0421, lr_0 = 1.5095e-04
Loss = 1.5656e-03, PNorm = 172.1968, GNorm = 0.0775, lr_0 = 1.5085e-04
Validation mae = 0.278104
Epoch 25
Loss = 1.5087e-03, PNorm = 172.2000, GNorm = 0.1025, lr_0 = 1.5075e-04
Loss = 1.4956e-03, PNorm = 172.2018, GNorm = 0.1766, lr_0 = 1.5064e-04
Loss = 1.5927e-03, PNorm = 172.2051, GNorm = 0.0284, lr_0 = 1.5054e-04
Loss = 1.2914e-03, PNorm = 172.2068, GNorm = 0.0583, lr_0 = 1.5044e-04
Loss = 1.7360e-03, PNorm = 172.2082, GNorm = 0.1270, lr_0 = 1.5033e-04
Loss = 1.6391e-03, PNorm = 172.2110, GNorm = 0.0470, lr_0 = 1.5023e-04
Loss = 1.6325e-03, PNorm = 172.2139, GNorm = 0.2247, lr_0 = 1.5013e-04
Loss = 1.2345e-03, PNorm = 172.2165, GNorm = 0.0451, lr_0 = 1.5002e-04
Loss = 1.6245e-03, PNorm = 172.2182, GNorm = 0.0914, lr_0 = 1.4992e-04
Loss = 1.8162e-03, PNorm = 172.2190, GNorm = 0.1165, lr_0 = 1.4982e-04
Loss = 1.3667e-03, PNorm = 172.2205, GNorm = 0.1126, lr_0 = 1.4972e-04
Loss = 1.0047e-03, PNorm = 172.2216, GNorm = 0.1096, lr_0 = 1.4961e-04
Loss = 1.3310e-03, PNorm = 172.2236, GNorm = 0.1408, lr_0 = 1.4951e-04
Loss = 1.0592e-03, PNorm = 172.2273, GNorm = 0.2426, lr_0 = 1.4941e-04
Loss = 2.1619e-03, PNorm = 172.2294, GNorm = 0.1719, lr_0 = 1.4931e-04
Loss = 2.0342e-03, PNorm = 172.2324, GNorm = 0.1011, lr_0 = 1.4920e-04
Loss = 1.8659e-03, PNorm = 172.2364, GNorm = 0.0857, lr_0 = 1.4910e-04
Loss = 3.3114e-03, PNorm = 172.2390, GNorm = 0.0857, lr_0 = 1.4900e-04
Loss = 1.2592e-03, PNorm = 172.2405, GNorm = 0.1213, lr_0 = 1.4890e-04
Loss = 1.6998e-03, PNorm = 172.2448, GNorm = 0.0926, lr_0 = 1.4880e-04
Loss = 1.7268e-03, PNorm = 172.2463, GNorm = 0.0754, lr_0 = 1.4869e-04
Loss = 2.4667e-03, PNorm = 172.2473, GNorm = 0.0777, lr_0 = 1.4859e-04
Loss = 1.1619e-03, PNorm = 172.2480, GNorm = 0.1925, lr_0 = 1.4849e-04
Loss = 1.1294e-03, PNorm = 172.2504, GNorm = 0.0474, lr_0 = 1.4839e-04
Loss = 2.9657e-03, PNorm = 172.2530, GNorm = 0.0383, lr_0 = 1.4829e-04
Loss = 1.4283e-03, PNorm = 172.2565, GNorm = 0.2408, lr_0 = 1.4818e-04
Loss = 1.0265e-03, PNorm = 172.2584, GNorm = 0.0956, lr_0 = 1.4808e-04
Loss = 1.3831e-03, PNorm = 172.2600, GNorm = 0.0425, lr_0 = 1.4798e-04
Loss = 1.1242e-03, PNorm = 172.2624, GNorm = 0.1320, lr_0 = 1.4788e-04
Loss = 9.7460e-04, PNorm = 172.2641, GNorm = 0.1800, lr_0 = 1.4778e-04
Loss = 2.5079e-03, PNorm = 172.2672, GNorm = 0.0919, lr_0 = 1.4768e-04
Loss = 2.6349e-03, PNorm = 172.2687, GNorm = 0.1817, lr_0 = 1.4758e-04
Loss = 1.4478e-03, PNorm = 172.2711, GNorm = 0.1426, lr_0 = 1.4748e-04
Loss = 2.3801e-03, PNorm = 172.2724, GNorm = 0.0758, lr_0 = 1.4737e-04
Loss = 3.3024e-03, PNorm = 172.2720, GNorm = 0.0439, lr_0 = 1.4727e-04
Loss = 2.0716e-03, PNorm = 172.2740, GNorm = 0.1387, lr_0 = 1.4717e-04
Loss = 3.4464e-03, PNorm = 172.2776, GNorm = 0.1167, lr_0 = 1.4707e-04
Loss = 1.3398e-03, PNorm = 172.2809, GNorm = 0.1143, lr_0 = 1.4697e-04
Loss = 2.2497e-03, PNorm = 172.2834, GNorm = 0.0739, lr_0 = 1.4687e-04
Loss = 3.1764e-03, PNorm = 172.2853, GNorm = 0.0906, lr_0 = 1.4677e-04
Loss = 3.0894e-03, PNorm = 172.2878, GNorm = 0.0620, lr_0 = 1.4667e-04
Loss = 1.2809e-03, PNorm = 172.2903, GNorm = 0.0799, lr_0 = 1.4657e-04
Loss = 1.0432e-03, PNorm = 172.2916, GNorm = 0.1822, lr_0 = 1.4647e-04
Loss = 1.3088e-03, PNorm = 172.2932, GNorm = 0.0780, lr_0 = 1.4637e-04
Loss = 1.3167e-03, PNorm = 172.2950, GNorm = 0.0993, lr_0 = 1.4627e-04
Loss = 1.1763e-03, PNorm = 172.2966, GNorm = 0.1832, lr_0 = 1.4617e-04
Loss = 1.0371e-03, PNorm = 172.2987, GNorm = 0.0566, lr_0 = 1.4607e-04
Loss = 8.8680e-04, PNorm = 172.3010, GNorm = 0.0691, lr_0 = 1.4597e-04
Loss = 9.8957e-04, PNorm = 172.3038, GNorm = 0.0927, lr_0 = 1.4587e-04
Loss = 8.3528e-04, PNorm = 172.3064, GNorm = 0.1667, lr_0 = 1.4577e-04
Loss = 1.8804e-03, PNorm = 172.3100, GNorm = 0.0711, lr_0 = 1.4567e-04
Loss = 2.5239e-03, PNorm = 172.3134, GNorm = 0.1006, lr_0 = 1.4557e-04
Loss = 1.7326e-03, PNorm = 172.3149, GNorm = 0.1080, lr_0 = 1.4547e-04
Loss = 1.1331e-03, PNorm = 172.3165, GNorm = 0.1302, lr_0 = 1.4537e-04
Loss = 1.6041e-03, PNorm = 172.3185, GNorm = 0.1201, lr_0 = 1.4527e-04
Loss = 1.8954e-03, PNorm = 172.3188, GNorm = 0.0967, lr_0 = 1.4517e-04
Loss = 3.6036e-03, PNorm = 172.3190, GNorm = 0.1669, lr_0 = 1.4507e-04
Loss = 1.3916e-03, PNorm = 172.3205, GNorm = 0.1251, lr_0 = 1.4497e-04
Loss = 1.6515e-03, PNorm = 172.3225, GNorm = 0.0494, lr_0 = 1.4487e-04
Loss = 1.0078e-03, PNorm = 172.3245, GNorm = 0.0843, lr_0 = 1.4477e-04
Loss = 9.5649e-04, PNorm = 172.3274, GNorm = 0.0421, lr_0 = 1.4467e-04
Loss = 1.1396e-03, PNorm = 172.3293, GNorm = 0.0603, lr_0 = 1.4457e-04
Loss = 1.4495e-03, PNorm = 172.3321, GNorm = 0.1519, lr_0 = 1.4447e-04
Loss = 1.4606e-03, PNorm = 172.3330, GNorm = 0.0290, lr_0 = 1.4438e-04
Loss = 8.0481e-04, PNorm = 172.3349, GNorm = 0.0926, lr_0 = 1.4428e-04
Loss = 1.6935e-03, PNorm = 172.3367, GNorm = 0.1105, lr_0 = 1.4418e-04
Loss = 9.8413e-04, PNorm = 172.3374, GNorm = 0.0429, lr_0 = 1.4408e-04
Loss = 1.3465e-03, PNorm = 172.3391, GNorm = 0.1158, lr_0 = 1.4398e-04
Loss = 9.2059e-04, PNorm = 172.3410, GNorm = 0.1353, lr_0 = 1.4388e-04
Loss = 9.0729e-04, PNorm = 172.3438, GNorm = 0.0648, lr_0 = 1.4378e-04
Loss = 1.8673e-03, PNorm = 172.3478, GNorm = 0.1872, lr_0 = 1.4368e-04
Loss = 1.5863e-03, PNorm = 172.3505, GNorm = 0.1153, lr_0 = 1.4359e-04
Loss = 2.0963e-03, PNorm = 172.3529, GNorm = 0.1527, lr_0 = 1.4349e-04
Loss = 1.1741e-03, PNorm = 172.3550, GNorm = 0.2366, lr_0 = 1.4339e-04
Loss = 1.4833e-03, PNorm = 172.3570, GNorm = 0.1660, lr_0 = 1.4329e-04
Loss = 2.3163e-03, PNorm = 172.3582, GNorm = 0.0466, lr_0 = 1.4319e-04
Loss = 1.2348e-03, PNorm = 172.3606, GNorm = 0.1796, lr_0 = 1.4310e-04
Loss = 9.6831e-04, PNorm = 172.3637, GNorm = 0.1186, lr_0 = 1.4300e-04
Loss = 2.1369e-03, PNorm = 172.3666, GNorm = 0.0803, lr_0 = 1.4290e-04
Loss = 1.0606e-03, PNorm = 172.3688, GNorm = 0.1288, lr_0 = 1.4280e-04
Loss = 1.1660e-03, PNorm = 172.3721, GNorm = 0.0726, lr_0 = 1.4270e-04
Loss = 2.0999e-03, PNorm = 172.3754, GNorm = 0.1993, lr_0 = 1.4261e-04
Loss = 1.6515e-03, PNorm = 172.3776, GNorm = 0.0468, lr_0 = 1.4251e-04
Loss = 1.3115e-03, PNorm = 172.3801, GNorm = 0.1416, lr_0 = 1.4241e-04
Loss = 9.0805e-04, PNorm = 172.3821, GNorm = 0.0525, lr_0 = 1.4231e-04
Loss = 1.4172e-03, PNorm = 172.3843, GNorm = 0.1103, lr_0 = 1.4222e-04
Loss = 1.4746e-03, PNorm = 172.3852, GNorm = 0.1366, lr_0 = 1.4212e-04
Loss = 2.1065e-03, PNorm = 172.3877, GNorm = 0.2829, lr_0 = 1.4202e-04
Loss = 4.2249e-03, PNorm = 172.3895, GNorm = 0.1555, lr_0 = 1.4192e-04
Loss = 1.3086e-03, PNorm = 172.3934, GNorm = 0.1733, lr_0 = 1.4183e-04
Loss = 1.5389e-03, PNorm = 172.3949, GNorm = 0.1646, lr_0 = 1.4173e-04
Loss = 1.0505e-03, PNorm = 172.3976, GNorm = 0.1663, lr_0 = 1.4163e-04
Loss = 1.3101e-03, PNorm = 172.4000, GNorm = 0.0864, lr_0 = 1.4153e-04
Loss = 2.3881e-03, PNorm = 172.4023, GNorm = 0.0341, lr_0 = 1.4144e-04
Loss = 1.5278e-03, PNorm = 172.4035, GNorm = 0.1542, lr_0 = 1.4134e-04
Loss = 2.9039e-03, PNorm = 172.4050, GNorm = 0.3307, lr_0 = 1.4124e-04
Loss = 2.1608e-03, PNorm = 172.4073, GNorm = 0.0917, lr_0 = 1.4115e-04
Loss = 1.5014e-03, PNorm = 172.4113, GNorm = 0.1411, lr_0 = 1.4105e-04
Loss = 1.6594e-03, PNorm = 172.4143, GNorm = 0.0848, lr_0 = 1.4095e-04
Loss = 1.0388e-03, PNorm = 172.4164, GNorm = 0.1277, lr_0 = 1.4086e-04
Loss = 1.0330e-03, PNorm = 172.4175, GNorm = 0.1015, lr_0 = 1.4076e-04
Loss = 2.1760e-03, PNorm = 172.4202, GNorm = 0.1199, lr_0 = 1.4066e-04
Loss = 1.5871e-03, PNorm = 172.4227, GNorm = 0.0691, lr_0 = 1.4057e-04
Loss = 2.3780e-03, PNorm = 172.4251, GNorm = 0.1868, lr_0 = 1.4047e-04
Loss = 1.0203e-03, PNorm = 172.4280, GNorm = 0.1579, lr_0 = 1.4038e-04
Loss = 2.0125e-03, PNorm = 172.4315, GNorm = 0.0822, lr_0 = 1.4028e-04
Loss = 1.0760e-03, PNorm = 172.4342, GNorm = 0.0421, lr_0 = 1.4018e-04
Loss = 1.2261e-03, PNorm = 172.4356, GNorm = 0.0382, lr_0 = 1.4009e-04
Loss = 4.9975e-03, PNorm = 172.4376, GNorm = 1.0680, lr_0 = 1.3999e-04
Loss = 1.2745e-03, PNorm = 172.4395, GNorm = 0.0389, lr_0 = 1.3990e-04
Loss = 1.2955e-03, PNorm = 172.4424, GNorm = 0.1346, lr_0 = 1.3980e-04
Loss = 2.0980e-03, PNorm = 172.4426, GNorm = 0.0913, lr_0 = 1.3970e-04
Loss = 2.1393e-03, PNorm = 172.4451, GNorm = 0.0675, lr_0 = 1.3961e-04
Loss = 5.4936e-03, PNorm = 172.4470, GNorm = 0.0786, lr_0 = 1.3951e-04
Loss = 2.2609e-03, PNorm = 172.4494, GNorm = 0.1476, lr_0 = 1.3942e-04
Loss = 9.9275e-04, PNorm = 172.4519, GNorm = 0.1148, lr_0 = 1.3932e-04
Loss = 1.4964e-03, PNorm = 172.4541, GNorm = 0.4164, lr_0 = 1.3923e-04
Loss = 3.5589e-03, PNorm = 172.4549, GNorm = 0.0471, lr_0 = 1.3913e-04
Loss = 1.7927e-03, PNorm = 172.4585, GNorm = 0.0833, lr_0 = 1.3904e-04
Loss = 1.2123e-03, PNorm = 172.4631, GNorm = 0.1379, lr_0 = 1.3894e-04
Validation mae = 0.277814
Epoch 26
Loss = 1.4660e-03, PNorm = 172.4656, GNorm = 0.1799, lr_0 = 1.3884e-04
Loss = 8.9212e-04, PNorm = 172.4674, GNorm = 0.1023, lr_0 = 1.3875e-04
Loss = 1.7873e-03, PNorm = 172.4689, GNorm = 0.0807, lr_0 = 1.3865e-04
Loss = 2.2074e-03, PNorm = 172.4713, GNorm = 0.1552, lr_0 = 1.3856e-04
Loss = 1.1500e-03, PNorm = 172.4731, GNorm = 0.1242, lr_0 = 1.3846e-04
Loss = 1.4826e-03, PNorm = 172.4745, GNorm = 0.0601, lr_0 = 1.3837e-04
Loss = 1.2743e-03, PNorm = 172.4762, GNorm = 0.1646, lr_0 = 1.3828e-04
Loss = 1.6218e-03, PNorm = 172.4789, GNorm = 0.0793, lr_0 = 1.3818e-04
Loss = 8.5774e-04, PNorm = 172.4818, GNorm = 0.1360, lr_0 = 1.3809e-04
Loss = 1.3880e-03, PNorm = 172.4838, GNorm = 0.0994, lr_0 = 1.3799e-04
Loss = 1.4484e-03, PNorm = 172.4851, GNorm = 0.1086, lr_0 = 1.3790e-04
Loss = 1.1355e-03, PNorm = 172.4877, GNorm = 0.0294, lr_0 = 1.3780e-04
Loss = 9.3767e-04, PNorm = 172.4898, GNorm = 0.1375, lr_0 = 1.3771e-04
Loss = 1.2819e-03, PNorm = 172.4915, GNorm = 0.0420, lr_0 = 1.3761e-04
Loss = 1.2576e-03, PNorm = 172.4929, GNorm = 0.0733, lr_0 = 1.3752e-04
Loss = 1.1668e-03, PNorm = 172.4956, GNorm = 0.0344, lr_0 = 1.3742e-04
Loss = 1.0927e-03, PNorm = 172.4984, GNorm = 0.0536, lr_0 = 1.3733e-04
Loss = 2.6058e-03, PNorm = 172.4998, GNorm = 0.1122, lr_0 = 1.3724e-04
Loss = 1.1547e-03, PNorm = 172.5016, GNorm = 0.1344, lr_0 = 1.3714e-04
Loss = 1.3472e-03, PNorm = 172.5026, GNorm = 0.0791, lr_0 = 1.3705e-04
Loss = 3.9287e-03, PNorm = 172.5043, GNorm = 0.0422, lr_0 = 1.3695e-04
Loss = 8.3645e-04, PNorm = 172.5058, GNorm = 0.0673, lr_0 = 1.3686e-04
Loss = 1.5505e-03, PNorm = 172.5063, GNorm = 0.0909, lr_0 = 1.3677e-04
Loss = 4.3547e-03, PNorm = 172.5072, GNorm = 0.0878, lr_0 = 1.3667e-04
Loss = 1.5742e-03, PNorm = 172.5101, GNorm = 0.1725, lr_0 = 1.3658e-04
Loss = 9.8398e-04, PNorm = 172.5119, GNorm = 0.0352, lr_0 = 1.3649e-04
Loss = 1.1009e-03, PNorm = 172.5134, GNorm = 0.2600, lr_0 = 1.3639e-04
Loss = 1.8782e-03, PNorm = 172.5148, GNorm = 0.1549, lr_0 = 1.3630e-04
Loss = 9.6786e-04, PNorm = 172.5163, GNorm = 0.0830, lr_0 = 1.3621e-04
Loss = 1.4294e-03, PNorm = 172.5186, GNorm = 0.1451, lr_0 = 1.3611e-04
Loss = 1.3032e-03, PNorm = 172.5213, GNorm = 0.0784, lr_0 = 1.3602e-04
Loss = 2.1514e-03, PNorm = 172.5248, GNorm = 0.1513, lr_0 = 1.3593e-04
Loss = 9.3698e-04, PNorm = 172.5286, GNorm = 0.1874, lr_0 = 1.3583e-04
Loss = 1.6746e-03, PNorm = 172.5312, GNorm = 0.0381, lr_0 = 1.3574e-04
Loss = 1.1380e-03, PNorm = 172.5333, GNorm = 0.1780, lr_0 = 1.3565e-04
Loss = 8.0593e-04, PNorm = 172.5357, GNorm = 0.1135, lr_0 = 1.3555e-04
Loss = 1.0356e-03, PNorm = 172.5387, GNorm = 0.1650, lr_0 = 1.3546e-04
Loss = 1.0170e-03, PNorm = 172.5394, GNorm = 0.0506, lr_0 = 1.3537e-04
Loss = 1.7249e-03, PNorm = 172.5405, GNorm = 0.0646, lr_0 = 1.3528e-04
Loss = 1.0100e-03, PNorm = 172.5426, GNorm = 0.1285, lr_0 = 1.3518e-04
Loss = 2.5078e-03, PNorm = 172.5445, GNorm = 0.1169, lr_0 = 1.3509e-04
Loss = 1.0131e-03, PNorm = 172.5469, GNorm = 0.1029, lr_0 = 1.3500e-04
Loss = 7.2265e-04, PNorm = 172.5475, GNorm = 0.0954, lr_0 = 1.3491e-04
Loss = 1.4082e-03, PNorm = 172.5474, GNorm = 0.1531, lr_0 = 1.3481e-04
Loss = 1.5661e-03, PNorm = 172.5485, GNorm = 0.1142, lr_0 = 1.3472e-04
Loss = 2.6668e-03, PNorm = 172.5498, GNorm = 0.1168, lr_0 = 1.3463e-04
Loss = 1.2536e-03, PNorm = 172.5531, GNorm = 0.0870, lr_0 = 1.3454e-04
Loss = 1.0131e-03, PNorm = 172.5539, GNorm = 0.0992, lr_0 = 1.3444e-04
Loss = 1.0929e-03, PNorm = 172.5554, GNorm = 0.0375, lr_0 = 1.3435e-04
Loss = 7.9028e-04, PNorm = 172.5581, GNorm = 0.0585, lr_0 = 1.3426e-04
Loss = 1.3935e-03, PNorm = 172.5602, GNorm = 0.0615, lr_0 = 1.3417e-04
Loss = 1.5143e-03, PNorm = 172.5622, GNorm = 0.0496, lr_0 = 1.3408e-04
Loss = 1.6745e-03, PNorm = 172.5640, GNorm = 0.0485, lr_0 = 1.3398e-04
Loss = 1.4283e-03, PNorm = 172.5666, GNorm = 0.1225, lr_0 = 1.3389e-04
Loss = 1.5238e-03, PNorm = 172.5663, GNorm = 0.3710, lr_0 = 1.3380e-04
Loss = 2.0735e-03, PNorm = 172.5680, GNorm = 0.0710, lr_0 = 1.3371e-04
Loss = 1.7113e-03, PNorm = 172.5718, GNorm = 0.1465, lr_0 = 1.3362e-04
Loss = 2.2392e-03, PNorm = 172.5750, GNorm = 0.0914, lr_0 = 1.3353e-04
Loss = 9.8210e-04, PNorm = 172.5756, GNorm = 0.0935, lr_0 = 1.3343e-04
Loss = 1.6660e-03, PNorm = 172.5764, GNorm = 0.1773, lr_0 = 1.3334e-04
Loss = 1.0478e-03, PNorm = 172.5775, GNorm = 0.1347, lr_0 = 1.3325e-04
Loss = 1.6065e-03, PNorm = 172.5804, GNorm = 0.1018, lr_0 = 1.3316e-04
Loss = 7.6958e-04, PNorm = 172.5825, GNorm = 0.0710, lr_0 = 1.3307e-04
Loss = 1.6980e-03, PNorm = 172.5843, GNorm = 0.2264, lr_0 = 1.3298e-04
Loss = 8.5744e-04, PNorm = 172.5852, GNorm = 0.1663, lr_0 = 1.3289e-04
Loss = 2.7129e-03, PNorm = 172.5886, GNorm = 0.7321, lr_0 = 1.3280e-04
Loss = 2.3990e-03, PNorm = 172.5910, GNorm = 0.1525, lr_0 = 1.3270e-04
Loss = 2.2002e-03, PNorm = 172.5926, GNorm = 0.2195, lr_0 = 1.3261e-04
Loss = 1.5170e-03, PNorm = 172.5939, GNorm = 0.0687, lr_0 = 1.3252e-04
Loss = 2.7095e-03, PNorm = 172.5955, GNorm = 0.1931, lr_0 = 1.3243e-04
Loss = 1.4979e-03, PNorm = 172.5988, GNorm = 0.0425, lr_0 = 1.3234e-04
Loss = 3.1733e-03, PNorm = 172.6013, GNorm = 0.0410, lr_0 = 1.3225e-04
Loss = 1.7033e-03, PNorm = 172.6048, GNorm = 0.1196, lr_0 = 1.3216e-04
Loss = 9.4221e-04, PNorm = 172.6071, GNorm = 0.0331, lr_0 = 1.3207e-04
Loss = 1.7476e-03, PNorm = 172.6082, GNorm = 0.1840, lr_0 = 1.3198e-04
Loss = 1.5069e-03, PNorm = 172.6105, GNorm = 0.1626, lr_0 = 1.3189e-04
Loss = 2.1309e-03, PNorm = 172.6117, GNorm = 0.0707, lr_0 = 1.3180e-04
Loss = 2.2405e-03, PNorm = 172.6122, GNorm = 0.0596, lr_0 = 1.3171e-04
Loss = 7.3388e-04, PNorm = 172.6136, GNorm = 0.1315, lr_0 = 1.3162e-04
Loss = 1.3109e-03, PNorm = 172.6152, GNorm = 0.0803, lr_0 = 1.3153e-04
Loss = 9.1232e-04, PNorm = 172.6157, GNorm = 0.0835, lr_0 = 1.3144e-04
Loss = 1.1917e-03, PNorm = 172.6162, GNorm = 0.0642, lr_0 = 1.3135e-04
Loss = 2.2872e-03, PNorm = 172.6160, GNorm = 0.0804, lr_0 = 1.3126e-04
Loss = 8.9684e-04, PNorm = 172.6165, GNorm = 0.0667, lr_0 = 1.3117e-04
Loss = 3.9328e-03, PNorm = 172.6165, GNorm = 0.2578, lr_0 = 1.3108e-04
Loss = 1.1662e-03, PNorm = 172.6180, GNorm = 0.0905, lr_0 = 1.3099e-04
Loss = 2.4857e-03, PNorm = 172.6187, GNorm = 0.1254, lr_0 = 1.3090e-04
Loss = 1.3448e-03, PNorm = 172.6228, GNorm = 0.0331, lr_0 = 1.3081e-04
Loss = 1.7781e-03, PNorm = 172.6261, GNorm = 0.0594, lr_0 = 1.3072e-04
Loss = 9.9252e-04, PNorm = 172.6291, GNorm = 0.2207, lr_0 = 1.3063e-04
Loss = 1.6019e-03, PNorm = 172.6312, GNorm = 0.0383, lr_0 = 1.3054e-04
Loss = 9.0998e-04, PNorm = 172.6335, GNorm = 0.1079, lr_0 = 1.3045e-04
Loss = 1.9888e-03, PNorm = 172.6357, GNorm = 0.1172, lr_0 = 1.3036e-04
Loss = 2.0696e-03, PNorm = 172.6367, GNorm = 0.0409, lr_0 = 1.3027e-04
Loss = 2.8174e-03, PNorm = 172.6395, GNorm = 0.0309, lr_0 = 1.3018e-04
Loss = 1.1123e-03, PNorm = 172.6415, GNorm = 0.0400, lr_0 = 1.3009e-04
Loss = 7.4273e-04, PNorm = 172.6434, GNorm = 0.1633, lr_0 = 1.3000e-04
Loss = 1.5264e-03, PNorm = 172.6437, GNorm = 0.1128, lr_0 = 1.2992e-04
Loss = 1.8880e-03, PNorm = 172.6451, GNorm = 0.0274, lr_0 = 1.2983e-04
Loss = 3.5528e-03, PNorm = 172.6469, GNorm = 0.0520, lr_0 = 1.2974e-04
Loss = 2.5871e-03, PNorm = 172.6494, GNorm = 0.1399, lr_0 = 1.2965e-04
Loss = 9.7713e-04, PNorm = 172.6521, GNorm = 0.1051, lr_0 = 1.2956e-04
Loss = 1.3448e-03, PNorm = 172.6536, GNorm = 0.0434, lr_0 = 1.2947e-04
Loss = 1.1212e-03, PNorm = 172.6556, GNorm = 0.1221, lr_0 = 1.2938e-04
Loss = 2.1612e-03, PNorm = 172.6576, GNorm = 0.1237, lr_0 = 1.2929e-04
Loss = 1.5010e-03, PNorm = 172.6605, GNorm = 0.0358, lr_0 = 1.2921e-04
Loss = 7.6343e-04, PNorm = 172.6624, GNorm = 0.1217, lr_0 = 1.2912e-04
Loss = 6.7753e-04, PNorm = 172.6656, GNorm = 0.0995, lr_0 = 1.2903e-04
Loss = 8.5931e-04, PNorm = 172.6682, GNorm = 0.1045, lr_0 = 1.2894e-04
Loss = 1.5908e-03, PNorm = 172.6703, GNorm = 0.1730, lr_0 = 1.2885e-04
Loss = 6.8669e-04, PNorm = 172.6717, GNorm = 0.0631, lr_0 = 1.2876e-04
Loss = 1.5181e-03, PNorm = 172.6725, GNorm = 0.1132, lr_0 = 1.2867e-04
Loss = 1.1491e-03, PNorm = 172.6741, GNorm = 0.1483, lr_0 = 1.2859e-04
Loss = 2.2894e-03, PNorm = 172.6751, GNorm = 0.4777, lr_0 = 1.2850e-04
Loss = 8.8368e-04, PNorm = 172.6754, GNorm = 0.1415, lr_0 = 1.2841e-04
Loss = 1.3558e-03, PNorm = 172.6770, GNorm = 0.1315, lr_0 = 1.2832e-04
Loss = 1.2114e-03, PNorm = 172.6796, GNorm = 0.0322, lr_0 = 1.2823e-04
Loss = 1.3584e-03, PNorm = 172.6815, GNorm = 0.0661, lr_0 = 1.2815e-04
Loss = 9.8462e-04, PNorm = 172.6826, GNorm = 0.0842, lr_0 = 1.2806e-04
Loss = 3.0437e-03, PNorm = 172.6842, GNorm = 0.1610, lr_0 = 1.2797e-04
Validation mae = 0.277635
Epoch 27
Loss = 1.4658e-03, PNorm = 172.6848, GNorm = 0.0761, lr_0 = 1.2788e-04
Loss = 1.4514e-03, PNorm = 172.6859, GNorm = 0.1158, lr_0 = 1.2780e-04
Loss = 1.7108e-03, PNorm = 172.6873, GNorm = 0.0575, lr_0 = 1.2771e-04
Loss = 1.3530e-03, PNorm = 172.6885, GNorm = 0.0818, lr_0 = 1.2762e-04
Loss = 9.8616e-04, PNorm = 172.6901, GNorm = 0.1792, lr_0 = 1.2753e-04
Loss = 9.0853e-04, PNorm = 172.6927, GNorm = 0.0821, lr_0 = 1.2745e-04
Loss = 1.9715e-03, PNorm = 172.6943, GNorm = 0.0776, lr_0 = 1.2736e-04
Loss = 8.1726e-04, PNorm = 172.6956, GNorm = 0.0808, lr_0 = 1.2727e-04
Loss = 9.8447e-04, PNorm = 172.6971, GNorm = 0.0540, lr_0 = 1.2718e-04
Loss = 8.0129e-04, PNorm = 172.6992, GNorm = 0.1047, lr_0 = 1.2710e-04
Loss = 7.4669e-04, PNorm = 172.7011, GNorm = 0.0491, lr_0 = 1.2701e-04
Loss = 8.6183e-04, PNorm = 172.7033, GNorm = 0.0816, lr_0 = 1.2692e-04
Loss = 1.1711e-03, PNorm = 172.7044, GNorm = 0.0650, lr_0 = 1.2684e-04
Loss = 2.8273e-03, PNorm = 172.7066, GNorm = 0.0463, lr_0 = 1.2675e-04
Loss = 1.3331e-03, PNorm = 172.7080, GNorm = 0.0594, lr_0 = 1.2666e-04
Loss = 1.4240e-03, PNorm = 172.7080, GNorm = 0.1301, lr_0 = 1.2658e-04
Loss = 2.6344e-03, PNorm = 172.7082, GNorm = 0.1123, lr_0 = 1.2649e-04
Loss = 1.7679e-03, PNorm = 172.7100, GNorm = 0.0853, lr_0 = 1.2640e-04
Loss = 6.9663e-04, PNorm = 172.7109, GNorm = 0.0847, lr_0 = 1.2632e-04
Loss = 1.0900e-03, PNorm = 172.7112, GNorm = 0.0618, lr_0 = 1.2623e-04
Loss = 9.0067e-04, PNorm = 172.7116, GNorm = 0.1360, lr_0 = 1.2614e-04
Loss = 1.3402e-03, PNorm = 172.7131, GNorm = 0.0415, lr_0 = 1.2606e-04
Loss = 6.1232e-04, PNorm = 172.7139, GNorm = 0.0610, lr_0 = 1.2597e-04
Loss = 8.4445e-04, PNorm = 172.7157, GNorm = 0.0776, lr_0 = 1.2588e-04
Loss = 8.1032e-04, PNorm = 172.7166, GNorm = 0.0295, lr_0 = 1.2580e-04
Loss = 2.4907e-03, PNorm = 172.7173, GNorm = 0.1776, lr_0 = 1.2571e-04
Loss = 3.5740e-03, PNorm = 172.7182, GNorm = 0.1572, lr_0 = 1.2563e-04
Loss = 2.3358e-03, PNorm = 172.7198, GNorm = 0.1119, lr_0 = 1.2554e-04
Loss = 3.0780e-03, PNorm = 172.7209, GNorm = 0.2240, lr_0 = 1.2545e-04
Loss = 3.2700e-03, PNorm = 172.7219, GNorm = 0.1473, lr_0 = 1.2537e-04
Loss = 1.1761e-03, PNorm = 172.7226, GNorm = 0.0616, lr_0 = 1.2528e-04
Loss = 2.2139e-03, PNorm = 172.7237, GNorm = 0.0984, lr_0 = 1.2520e-04
Loss = 1.0680e-03, PNorm = 172.7264, GNorm = 0.1851, lr_0 = 1.2511e-04
Loss = 6.4654e-04, PNorm = 172.7285, GNorm = 0.0454, lr_0 = 1.2502e-04
Loss = 1.0494e-03, PNorm = 172.7300, GNorm = 0.0580, lr_0 = 1.2494e-04
Loss = 6.4193e-04, PNorm = 172.7314, GNorm = 0.0742, lr_0 = 1.2485e-04
Loss = 8.5504e-04, PNorm = 172.7341, GNorm = 0.0784, lr_0 = 1.2477e-04
Loss = 8.6819e-04, PNorm = 172.7355, GNorm = 0.1498, lr_0 = 1.2468e-04
Loss = 1.1769e-03, PNorm = 172.7388, GNorm = 0.1829, lr_0 = 1.2460e-04
Loss = 1.1614e-03, PNorm = 172.7415, GNorm = 0.1091, lr_0 = 1.2451e-04
Loss = 2.0368e-03, PNorm = 172.7433, GNorm = 0.1188, lr_0 = 1.2443e-04
Loss = 6.3180e-04, PNorm = 172.7466, GNorm = 0.0752, lr_0 = 1.2434e-04
Loss = 1.4533e-03, PNorm = 172.7472, GNorm = 0.0738, lr_0 = 1.2426e-04
Loss = 1.5841e-03, PNorm = 172.7485, GNorm = 0.1445, lr_0 = 1.2417e-04
Loss = 1.4671e-03, PNorm = 172.7502, GNorm = 0.2516, lr_0 = 1.2409e-04
Loss = 1.2966e-03, PNorm = 172.7510, GNorm = 0.0782, lr_0 = 1.2400e-04
Loss = 1.1819e-03, PNorm = 172.7531, GNorm = 0.0480, lr_0 = 1.2392e-04
Loss = 1.1040e-03, PNorm = 172.7547, GNorm = 0.1864, lr_0 = 1.2383e-04
Loss = 8.9523e-04, PNorm = 172.7565, GNorm = 0.0843, lr_0 = 1.2375e-04
Loss = 1.6433e-03, PNorm = 172.7579, GNorm = 0.1171, lr_0 = 1.2366e-04
Loss = 1.5673e-03, PNorm = 172.7603, GNorm = 0.1146, lr_0 = 1.2358e-04
Loss = 1.1419e-03, PNorm = 172.7624, GNorm = 0.0771, lr_0 = 1.2349e-04
Loss = 7.7041e-04, PNorm = 172.7639, GNorm = 0.1009, lr_0 = 1.2341e-04
Loss = 1.1956e-03, PNorm = 172.7650, GNorm = 0.0819, lr_0 = 1.2332e-04
Loss = 7.2211e-04, PNorm = 172.7662, GNorm = 0.0634, lr_0 = 1.2324e-04
Loss = 2.5122e-03, PNorm = 172.7675, GNorm = 0.1375, lr_0 = 1.2315e-04
Loss = 2.8273e-03, PNorm = 172.7689, GNorm = 0.1102, lr_0 = 1.2307e-04
Loss = 1.7082e-03, PNorm = 172.7704, GNorm = 0.0380, lr_0 = 1.2298e-04
Loss = 6.7368e-04, PNorm = 172.7720, GNorm = 0.2445, lr_0 = 1.2290e-04
Loss = 1.3137e-03, PNorm = 172.7735, GNorm = 0.1862, lr_0 = 1.2282e-04
Loss = 1.5649e-03, PNorm = 172.7763, GNorm = 0.1206, lr_0 = 1.2273e-04
Loss = 1.0137e-03, PNorm = 172.7778, GNorm = 0.1013, lr_0 = 1.2265e-04
Loss = 1.4801e-03, PNorm = 172.7795, GNorm = 0.1156, lr_0 = 1.2256e-04
Loss = 6.2189e-04, PNorm = 172.7811, GNorm = 0.0309, lr_0 = 1.2248e-04
Loss = 1.5269e-03, PNorm = 172.7835, GNorm = 0.0586, lr_0 = 1.2240e-04
Loss = 2.2528e-03, PNorm = 172.7862, GNorm = 0.0277, lr_0 = 1.2231e-04
Loss = 1.2644e-03, PNorm = 172.7884, GNorm = 0.2099, lr_0 = 1.2223e-04
Loss = 1.7267e-03, PNorm = 172.7901, GNorm = 0.0345, lr_0 = 1.2214e-04
Loss = 1.8441e-03, PNorm = 172.7914, GNorm = 0.0910, lr_0 = 1.2206e-04
Loss = 9.1147e-04, PNorm = 172.7926, GNorm = 0.1006, lr_0 = 1.2198e-04
Loss = 1.0733e-03, PNorm = 172.7944, GNorm = 0.1004, lr_0 = 1.2189e-04
Loss = 1.1106e-03, PNorm = 172.7968, GNorm = 0.0473, lr_0 = 1.2181e-04
Loss = 1.4261e-03, PNorm = 172.7982, GNorm = 0.1914, lr_0 = 1.2173e-04
Loss = 1.1946e-03, PNorm = 172.7997, GNorm = 0.0721, lr_0 = 1.2164e-04
Loss = 1.8797e-03, PNorm = 172.8011, GNorm = 0.0434, lr_0 = 1.2156e-04
Loss = 1.5881e-03, PNorm = 172.8021, GNorm = 0.0792, lr_0 = 1.2148e-04
Loss = 9.0515e-04, PNorm = 172.8027, GNorm = 0.1363, lr_0 = 1.2139e-04
Loss = 9.4415e-04, PNorm = 172.8042, GNorm = 0.0706, lr_0 = 1.2131e-04
Loss = 2.9793e-03, PNorm = 172.8074, GNorm = 0.0712, lr_0 = 1.2123e-04
Loss = 1.2677e-03, PNorm = 172.8097, GNorm = 0.0866, lr_0 = 1.2114e-04
Loss = 9.3318e-04, PNorm = 172.8111, GNorm = 0.1234, lr_0 = 1.2106e-04
Loss = 9.9587e-04, PNorm = 172.8121, GNorm = 0.0610, lr_0 = 1.2098e-04
Loss = 1.2384e-03, PNorm = 172.8134, GNorm = 0.0521, lr_0 = 1.2090e-04
Loss = 1.4705e-03, PNorm = 172.8145, GNorm = 0.0798, lr_0 = 1.2081e-04
Loss = 1.0337e-03, PNorm = 172.8152, GNorm = 0.0659, lr_0 = 1.2073e-04
Loss = 2.6316e-03, PNorm = 172.8174, GNorm = 0.1342, lr_0 = 1.2065e-04
Loss = 7.8810e-04, PNorm = 172.8198, GNorm = 0.0761, lr_0 = 1.2056e-04
Loss = 1.8004e-03, PNorm = 172.8225, GNorm = 0.0563, lr_0 = 1.2048e-04
Loss = 1.1267e-03, PNorm = 172.8259, GNorm = 0.1615, lr_0 = 1.2040e-04
Loss = 2.1135e-03, PNorm = 172.8274, GNorm = 0.0364, lr_0 = 1.2032e-04
Loss = 7.6731e-04, PNorm = 172.8287, GNorm = 0.0494, lr_0 = 1.2023e-04
Loss = 7.4101e-04, PNorm = 172.8312, GNorm = 0.0342, lr_0 = 1.2015e-04
Loss = 8.5956e-04, PNorm = 172.8325, GNorm = 0.1522, lr_0 = 1.2007e-04
Loss = 2.2976e-03, PNorm = 172.8332, GNorm = 0.0666, lr_0 = 1.1999e-04
Loss = 2.5259e-03, PNorm = 172.8345, GNorm = 0.7889, lr_0 = 1.1991e-04
Loss = 8.7242e-04, PNorm = 172.8372, GNorm = 0.1127, lr_0 = 1.1982e-04
Loss = 1.0141e-03, PNorm = 172.8398, GNorm = 0.1077, lr_0 = 1.1974e-04
Loss = 1.4212e-03, PNorm = 172.8429, GNorm = 0.1335, lr_0 = 1.1966e-04
Loss = 1.6902e-03, PNorm = 172.8437, GNorm = 0.0911, lr_0 = 1.1958e-04
Loss = 1.3711e-03, PNorm = 172.8443, GNorm = 0.1160, lr_0 = 1.1950e-04
Loss = 8.3722e-04, PNorm = 172.8458, GNorm = 0.0711, lr_0 = 1.1941e-04
Loss = 1.9280e-03, PNorm = 172.8472, GNorm = 0.1140, lr_0 = 1.1933e-04
Loss = 7.3706e-04, PNorm = 172.8488, GNorm = 0.1148, lr_0 = 1.1925e-04
Loss = 6.5233e-04, PNorm = 172.8498, GNorm = 0.0883, lr_0 = 1.1917e-04
Loss = 7.0940e-04, PNorm = 172.8506, GNorm = 0.0610, lr_0 = 1.1909e-04
Loss = 2.1391e-03, PNorm = 172.8525, GNorm = 0.1031, lr_0 = 1.1901e-04
Loss = 6.5334e-04, PNorm = 172.8544, GNorm = 0.0599, lr_0 = 1.1892e-04
Loss = 1.0438e-03, PNorm = 172.8563, GNorm = 0.2629, lr_0 = 1.1884e-04
Loss = 1.0049e-03, PNorm = 172.8579, GNorm = 0.0987, lr_0 = 1.1876e-04
Loss = 6.3538e-04, PNorm = 172.8594, GNorm = 0.0332, lr_0 = 1.1868e-04
Loss = 1.3975e-03, PNorm = 172.8607, GNorm = 0.0416, lr_0 = 1.1860e-04
Loss = 1.3133e-03, PNorm = 172.8616, GNorm = 0.0617, lr_0 = 1.1852e-04
Loss = 9.5676e-04, PNorm = 172.8625, GNorm = 0.1245, lr_0 = 1.1844e-04
Loss = 1.0777e-03, PNorm = 172.8635, GNorm = 0.1878, lr_0 = 1.1835e-04
Loss = 1.9201e-03, PNorm = 172.8649, GNorm = 0.2505, lr_0 = 1.1827e-04
Loss = 3.5752e-03, PNorm = 172.8662, GNorm = 0.1458, lr_0 = 1.1819e-04
Loss = 1.8961e-03, PNorm = 172.8677, GNorm = 0.1079, lr_0 = 1.1811e-04
Loss = 7.9855e-04, PNorm = 172.8698, GNorm = 0.1611, lr_0 = 1.1803e-04
Loss = 1.7794e-03, PNorm = 172.8708, GNorm = 0.0727, lr_0 = 1.1795e-04
Loss = 1.7230e-03, PNorm = 172.8720, GNorm = 0.1426, lr_0 = 1.1787e-04
Validation mae = 0.277650
Epoch 28
Loss = 5.9936e-04, PNorm = 172.8731, GNorm = 0.0894, lr_0 = 1.1779e-04
Loss = 6.4801e-04, PNorm = 172.8739, GNorm = 0.1095, lr_0 = 1.1771e-04
Loss = 9.1609e-04, PNorm = 172.8757, GNorm = 0.0573, lr_0 = 1.1763e-04
Loss = 8.3083e-04, PNorm = 172.8772, GNorm = 0.0744, lr_0 = 1.1755e-04
Loss = 5.7849e-04, PNorm = 172.8778, GNorm = 0.1006, lr_0 = 1.1747e-04
Loss = 2.8211e-03, PNorm = 172.8779, GNorm = 0.1578, lr_0 = 1.1739e-04
Loss = 1.1600e-03, PNorm = 172.8789, GNorm = 0.0587, lr_0 = 1.1730e-04
Loss = 7.7839e-04, PNorm = 172.8802, GNorm = 0.1293, lr_0 = 1.1722e-04
Loss = 1.2111e-03, PNorm = 172.8813, GNorm = 0.0920, lr_0 = 1.1714e-04
Loss = 1.3219e-03, PNorm = 172.8829, GNorm = 0.1599, lr_0 = 1.1706e-04
Loss = 9.3701e-04, PNorm = 172.8844, GNorm = 0.1240, lr_0 = 1.1698e-04
Loss = 8.6254e-04, PNorm = 172.8848, GNorm = 0.0473, lr_0 = 1.1690e-04
Loss = 1.1485e-03, PNorm = 172.8849, GNorm = 0.0800, lr_0 = 1.1682e-04
Loss = 2.6091e-03, PNorm = 172.8851, GNorm = 0.1673, lr_0 = 1.1674e-04
Loss = 6.1817e-04, PNorm = 172.8853, GNorm = 0.0602, lr_0 = 1.1666e-04
Loss = 6.3492e-04, PNorm = 172.8869, GNorm = 0.0622, lr_0 = 1.1658e-04
Loss = 5.2905e-04, PNorm = 172.8879, GNorm = 0.0486, lr_0 = 1.1650e-04
Loss = 8.6173e-04, PNorm = 172.8887, GNorm = 0.0460, lr_0 = 1.1642e-04
Loss = 1.5167e-03, PNorm = 172.8907, GNorm = 0.0585, lr_0 = 1.1634e-04
Loss = 7.5456e-04, PNorm = 172.8922, GNorm = 0.0342, lr_0 = 1.1626e-04
Loss = 1.4334e-03, PNorm = 172.8942, GNorm = 0.1046, lr_0 = 1.1618e-04
Loss = 1.2074e-03, PNorm = 172.8966, GNorm = 0.2090, lr_0 = 1.1611e-04
Loss = 1.6203e-03, PNorm = 172.8983, GNorm = 0.0512, lr_0 = 1.1603e-04
Loss = 2.2882e-03, PNorm = 172.8992, GNorm = 0.2950, lr_0 = 1.1595e-04
Loss = 6.9725e-04, PNorm = 172.8999, GNorm = 0.0704, lr_0 = 1.1587e-04
Loss = 2.0773e-03, PNorm = 172.9012, GNorm = 0.0483, lr_0 = 1.1579e-04
Loss = 1.1282e-03, PNorm = 172.9016, GNorm = 0.1555, lr_0 = 1.1571e-04
Loss = 7.4885e-04, PNorm = 172.9042, GNorm = 0.0913, lr_0 = 1.1563e-04
Loss = 2.1562e-03, PNorm = 172.9066, GNorm = 0.2533, lr_0 = 1.1555e-04
Loss = 7.5961e-04, PNorm = 172.9073, GNorm = 0.0681, lr_0 = 1.1547e-04
Loss = 9.6801e-04, PNorm = 172.9087, GNorm = 0.0902, lr_0 = 1.1539e-04
Loss = 5.9839e-04, PNorm = 172.9095, GNorm = 0.0884, lr_0 = 1.1531e-04
Loss = 1.2240e-03, PNorm = 172.9100, GNorm = 0.1313, lr_0 = 1.1523e-04
Loss = 5.1362e-04, PNorm = 172.9110, GNorm = 0.0602, lr_0 = 1.1515e-04
Loss = 9.5038e-04, PNorm = 172.9123, GNorm = 0.1248, lr_0 = 1.1508e-04
Loss = 7.2390e-04, PNorm = 172.9139, GNorm = 0.0329, lr_0 = 1.1500e-04
Loss = 5.6942e-04, PNorm = 172.9161, GNorm = 0.0637, lr_0 = 1.1492e-04
Loss = 1.2019e-03, PNorm = 172.9173, GNorm = 0.1087, lr_0 = 1.1484e-04
Loss = 1.2240e-03, PNorm = 172.9180, GNorm = 0.1224, lr_0 = 1.1476e-04
Loss = 1.3291e-03, PNorm = 172.9183, GNorm = 0.0593, lr_0 = 1.1468e-04
Loss = 1.1059e-03, PNorm = 172.9178, GNorm = 0.3018, lr_0 = 1.1460e-04
Loss = 8.1489e-04, PNorm = 172.9195, GNorm = 0.0688, lr_0 = 1.1452e-04
Loss = 1.6839e-03, PNorm = 172.9212, GNorm = 0.2387, lr_0 = 1.1445e-04
Loss = 1.7520e-03, PNorm = 172.9231, GNorm = 0.0643, lr_0 = 1.1437e-04
Loss = 1.7240e-03, PNorm = 172.9244, GNorm = 0.0568, lr_0 = 1.1429e-04
Loss = 1.1210e-03, PNorm = 172.9265, GNorm = 0.0821, lr_0 = 1.1421e-04
Loss = 7.1799e-04, PNorm = 172.9287, GNorm = 0.0796, lr_0 = 1.1413e-04
Loss = 3.6392e-03, PNorm = 172.9298, GNorm = 0.1445, lr_0 = 1.1405e-04
Loss = 8.4074e-04, PNorm = 172.9321, GNorm = 0.1000, lr_0 = 1.1398e-04
Loss = 1.7254e-03, PNorm = 172.9336, GNorm = 0.0760, lr_0 = 1.1390e-04
Loss = 1.2435e-03, PNorm = 172.9344, GNorm = 0.0448, lr_0 = 1.1382e-04
Loss = 1.8926e-03, PNorm = 172.9354, GNorm = 0.0431, lr_0 = 1.1374e-04
Loss = 5.5543e-04, PNorm = 172.9364, GNorm = 0.0986, lr_0 = 1.1366e-04
Loss = 1.3858e-03, PNorm = 172.9369, GNorm = 0.1268, lr_0 = 1.1359e-04
Loss = 5.3702e-04, PNorm = 172.9390, GNorm = 0.1147, lr_0 = 1.1351e-04
Loss = 7.0041e-04, PNorm = 172.9405, GNorm = 0.0442, lr_0 = 1.1343e-04
Loss = 8.1793e-04, PNorm = 172.9423, GNorm = 0.0614, lr_0 = 1.1335e-04
Loss = 7.2344e-04, PNorm = 172.9424, GNorm = 0.0744, lr_0 = 1.1328e-04
Loss = 1.5565e-03, PNorm = 172.9429, GNorm = 0.0571, lr_0 = 1.1320e-04
Loss = 7.0503e-04, PNorm = 172.9427, GNorm = 0.0795, lr_0 = 1.1312e-04
Loss = 1.7291e-03, PNorm = 172.9450, GNorm = 0.2404, lr_0 = 1.1304e-04
Loss = 7.4573e-04, PNorm = 172.9463, GNorm = 0.1363, lr_0 = 1.1297e-04
Loss = 3.3922e-03, PNorm = 172.9474, GNorm = 0.2570, lr_0 = 1.1289e-04
Loss = 1.0536e-03, PNorm = 172.9485, GNorm = 0.0357, lr_0 = 1.1281e-04
Loss = 6.4179e-04, PNorm = 172.9500, GNorm = 0.0738, lr_0 = 1.1273e-04
Loss = 1.8989e-03, PNorm = 172.9510, GNorm = 0.0852, lr_0 = 1.1266e-04
Loss = 1.4619e-03, PNorm = 172.9535, GNorm = 0.1845, lr_0 = 1.1258e-04
Loss = 1.6942e-03, PNorm = 172.9547, GNorm = 0.1299, lr_0 = 1.1250e-04
Loss = 7.1074e-04, PNorm = 172.9570, GNorm = 0.1733, lr_0 = 1.1243e-04
Loss = 1.3020e-03, PNorm = 172.9578, GNorm = 0.0302, lr_0 = 1.1235e-04
Loss = 1.6882e-03, PNorm = 172.9594, GNorm = 0.0186, lr_0 = 1.1227e-04
Loss = 7.3485e-04, PNorm = 172.9616, GNorm = 0.1472, lr_0 = 1.1219e-04
Loss = 1.8178e-03, PNorm = 172.9625, GNorm = 0.1161, lr_0 = 1.1212e-04
Loss = 1.4547e-03, PNorm = 172.9637, GNorm = 0.3319, lr_0 = 1.1204e-04
Loss = 2.0355e-03, PNorm = 172.9659, GNorm = 0.0797, lr_0 = 1.1196e-04
Loss = 8.0864e-04, PNorm = 172.9672, GNorm = 0.0443, lr_0 = 1.1189e-04
Loss = 1.9488e-03, PNorm = 172.9697, GNorm = 0.1456, lr_0 = 1.1181e-04
Loss = 4.4654e-03, PNorm = 172.9705, GNorm = 0.3562, lr_0 = 1.1173e-04
Loss = 1.2951e-03, PNorm = 172.9709, GNorm = 0.0809, lr_0 = 1.1166e-04
Loss = 6.7126e-04, PNorm = 172.9729, GNorm = 0.1009, lr_0 = 1.1158e-04
Loss = 1.5866e-03, PNorm = 172.9743, GNorm = 0.1344, lr_0 = 1.1150e-04
Loss = 5.3028e-04, PNorm = 172.9767, GNorm = 0.0380, lr_0 = 1.1143e-04
Loss = 1.6218e-03, PNorm = 172.9765, GNorm = 0.0528, lr_0 = 1.1135e-04
Loss = 1.3898e-03, PNorm = 172.9780, GNorm = 0.1349, lr_0 = 1.1128e-04
Loss = 1.8889e-03, PNorm = 172.9785, GNorm = 0.0614, lr_0 = 1.1120e-04
Loss = 6.9576e-04, PNorm = 172.9791, GNorm = 0.0743, lr_0 = 1.1112e-04
Loss = 1.2553e-03, PNorm = 172.9803, GNorm = 0.1395, lr_0 = 1.1105e-04
Loss = 1.7474e-03, PNorm = 172.9821, GNorm = 0.0670, lr_0 = 1.1097e-04
Loss = 8.9112e-04, PNorm = 172.9835, GNorm = 0.1860, lr_0 = 1.1089e-04
Loss = 1.3501e-03, PNorm = 172.9871, GNorm = 0.0676, lr_0 = 1.1082e-04
Loss = 1.7349e-03, PNorm = 172.9865, GNorm = 0.0803, lr_0 = 1.1074e-04
Loss = 7.7154e-04, PNorm = 172.9869, GNorm = 0.1320, lr_0 = 1.1067e-04
Loss = 1.3481e-03, PNorm = 172.9877, GNorm = 0.0512, lr_0 = 1.1059e-04
Loss = 8.1483e-04, PNorm = 172.9881, GNorm = 0.0703, lr_0 = 1.1052e-04
Loss = 6.4721e-04, PNorm = 172.9895, GNorm = 0.0520, lr_0 = 1.1044e-04
Loss = 5.5795e-04, PNorm = 172.9912, GNorm = 0.0488, lr_0 = 1.1036e-04
Loss = 1.7910e-03, PNorm = 172.9927, GNorm = 0.1083, lr_0 = 1.1029e-04
Loss = 2.6794e-03, PNorm = 172.9936, GNorm = 0.0277, lr_0 = 1.1021e-04
Loss = 1.1189e-03, PNorm = 172.9953, GNorm = 0.0563, lr_0 = 1.1014e-04
Loss = 2.5296e-03, PNorm = 172.9965, GNorm = 0.1024, lr_0 = 1.1006e-04
Loss = 1.2033e-03, PNorm = 172.9979, GNorm = 0.1151, lr_0 = 1.0999e-04
Loss = 6.8517e-04, PNorm = 172.9991, GNorm = 0.0795, lr_0 = 1.0991e-04
Loss = 7.8247e-04, PNorm = 173.0007, GNorm = 0.1120, lr_0 = 1.0984e-04
Loss = 2.8460e-03, PNorm = 173.0035, GNorm = 0.0523, lr_0 = 1.0976e-04
Loss = 1.1040e-03, PNorm = 173.0045, GNorm = 0.0605, lr_0 = 1.0969e-04
Loss = 1.5726e-03, PNorm = 173.0052, GNorm = 0.0816, lr_0 = 1.0961e-04
Loss = 1.4446e-03, PNorm = 173.0056, GNorm = 0.1642, lr_0 = 1.0954e-04
Loss = 8.2413e-04, PNorm = 173.0075, GNorm = 0.0595, lr_0 = 1.0946e-04
Loss = 6.9892e-04, PNorm = 173.0090, GNorm = 0.0930, lr_0 = 1.0939e-04
Loss = 3.1389e-03, PNorm = 173.0097, GNorm = 0.0995, lr_0 = 1.0931e-04
Loss = 1.3571e-03, PNorm = 173.0115, GNorm = 0.0680, lr_0 = 1.0924e-04
Loss = 6.5636e-04, PNorm = 173.0127, GNorm = 0.0428, lr_0 = 1.0916e-04
Loss = 1.5422e-03, PNorm = 173.0135, GNorm = 0.2221, lr_0 = 1.0909e-04
Loss = 7.0544e-04, PNorm = 173.0146, GNorm = 0.0644, lr_0 = 1.0901e-04
Loss = 1.4819e-03, PNorm = 173.0150, GNorm = 0.0850, lr_0 = 1.0894e-04
Loss = 1.4855e-03, PNorm = 173.0164, GNorm = 0.2909, lr_0 = 1.0886e-04
Loss = 6.1259e-04, PNorm = 173.0173, GNorm = 0.0402, lr_0 = 1.0879e-04
Loss = 1.0532e-03, PNorm = 173.0198, GNorm = 0.1025, lr_0 = 1.0871e-04
Loss = 3.7326e-03, PNorm = 173.0206, GNorm = 0.1135, lr_0 = 1.0864e-04
Loss = 6.7869e-04, PNorm = 173.0222, GNorm = 0.0802, lr_0 = 1.0856e-04
Validation mae = 0.277655
Epoch 29
Loss = 1.1355e-03, PNorm = 173.0224, GNorm = 0.0373, lr_0 = 1.0849e-04
Loss = 7.3495e-04, PNorm = 173.0223, GNorm = 0.1321, lr_0 = 1.0841e-04
Loss = 1.1334e-03, PNorm = 173.0239, GNorm = 0.0539, lr_0 = 1.0834e-04
Loss = 1.8737e-03, PNorm = 173.0258, GNorm = 0.0531, lr_0 = 1.0827e-04
Loss = 5.5578e-04, PNorm = 173.0279, GNorm = 0.1251, lr_0 = 1.0819e-04
Loss = 6.2890e-04, PNorm = 173.0302, GNorm = 0.1409, lr_0 = 1.0812e-04
Loss = 5.5287e-04, PNorm = 173.0311, GNorm = 0.0529, lr_0 = 1.0804e-04
Loss = 1.6829e-03, PNorm = 173.0312, GNorm = 0.1339, lr_0 = 1.0797e-04
Loss = 7.3341e-04, PNorm = 173.0310, GNorm = 0.0414, lr_0 = 1.0790e-04
Loss = 9.0508e-04, PNorm = 173.0315, GNorm = 0.0845, lr_0 = 1.0782e-04
Loss = 7.6934e-04, PNorm = 173.0320, GNorm = 0.0231, lr_0 = 1.0775e-04
Loss = 1.1195e-03, PNorm = 173.0322, GNorm = 0.0807, lr_0 = 1.0767e-04
Loss = 1.7948e-03, PNorm = 173.0322, GNorm = 0.3776, lr_0 = 1.0760e-04
Loss = 6.1102e-04, PNorm = 173.0328, GNorm = 0.1495, lr_0 = 1.0753e-04
Loss = 1.7682e-03, PNorm = 173.0340, GNorm = 0.0545, lr_0 = 1.0745e-04
Loss = 7.6804e-04, PNorm = 173.0359, GNorm = 0.1036, lr_0 = 1.0738e-04
Loss = 9.3215e-04, PNorm = 173.0379, GNorm = 0.0832, lr_0 = 1.0731e-04
Loss = 6.6970e-04, PNorm = 173.0393, GNorm = 0.0463, lr_0 = 1.0723e-04
Loss = 1.3997e-03, PNorm = 173.0390, GNorm = 0.0543, lr_0 = 1.0716e-04
Loss = 1.3448e-03, PNorm = 173.0405, GNorm = 0.0445, lr_0 = 1.0709e-04
Loss = 1.2641e-03, PNorm = 173.0418, GNorm = 0.0635, lr_0 = 1.0701e-04
Loss = 1.9408e-03, PNorm = 173.0438, GNorm = 0.1624, lr_0 = 1.0694e-04
Loss = 9.1736e-04, PNorm = 173.0448, GNorm = 0.0730, lr_0 = 1.0687e-04
Loss = 6.8696e-04, PNorm = 173.0461, GNorm = 0.1468, lr_0 = 1.0679e-04
Loss = 1.7283e-03, PNorm = 173.0482, GNorm = 0.0614, lr_0 = 1.0672e-04
Loss = 1.8100e-03, PNorm = 173.0504, GNorm = 0.0697, lr_0 = 1.0665e-04
Loss = 6.6361e-04, PNorm = 173.0522, GNorm = 0.0767, lr_0 = 1.0657e-04
Loss = 1.0722e-03, PNorm = 173.0526, GNorm = 0.1123, lr_0 = 1.0650e-04
Loss = 1.5512e-03, PNorm = 173.0539, GNorm = 0.0537, lr_0 = 1.0643e-04
Loss = 1.3154e-03, PNorm = 173.0549, GNorm = 0.1604, lr_0 = 1.0635e-04
Loss = 1.2651e-03, PNorm = 173.0563, GNorm = 0.0518, lr_0 = 1.0628e-04
Loss = 6.8367e-04, PNorm = 173.0583, GNorm = 0.0731, lr_0 = 1.0621e-04
Loss = 1.1991e-03, PNorm = 173.0596, GNorm = 0.0907, lr_0 = 1.0614e-04
Loss = 1.8159e-03, PNorm = 173.0608, GNorm = 0.1015, lr_0 = 1.0606e-04
Loss = 5.3433e-04, PNorm = 173.0621, GNorm = 0.1470, lr_0 = 1.0599e-04
Loss = 5.9248e-04, PNorm = 173.0632, GNorm = 0.0307, lr_0 = 1.0592e-04
Loss = 2.1693e-03, PNorm = 173.0634, GNorm = 0.1438, lr_0 = 1.0585e-04
Loss = 1.8588e-03, PNorm = 173.0650, GNorm = 0.0283, lr_0 = 1.0577e-04
Loss = 1.6836e-03, PNorm = 173.0658, GNorm = 0.1294, lr_0 = 1.0570e-04
Loss = 1.4601e-03, PNorm = 173.0674, GNorm = 0.1741, lr_0 = 1.0563e-04
Loss = 7.1030e-04, PNorm = 173.0684, GNorm = 0.0977, lr_0 = 1.0556e-04
Loss = 7.3812e-04, PNorm = 173.0702, GNorm = 0.2350, lr_0 = 1.0548e-04
Loss = 1.7707e-03, PNorm = 173.0718, GNorm = 0.0895, lr_0 = 1.0541e-04
Loss = 5.5622e-04, PNorm = 173.0726, GNorm = 0.1437, lr_0 = 1.0534e-04
Loss = 1.6067e-03, PNorm = 173.0739, GNorm = 0.1047, lr_0 = 1.0527e-04
Loss = 2.2039e-03, PNorm = 173.0754, GNorm = 0.0557, lr_0 = 1.0519e-04
Loss = 1.3652e-03, PNorm = 173.0755, GNorm = 0.1127, lr_0 = 1.0512e-04
Loss = 7.1791e-04, PNorm = 173.0761, GNorm = 0.0539, lr_0 = 1.0505e-04
Loss = 8.9756e-04, PNorm = 173.0769, GNorm = 0.0396, lr_0 = 1.0498e-04
Loss = 5.5388e-04, PNorm = 173.0775, GNorm = 0.0545, lr_0 = 1.0491e-04
Loss = 6.2802e-04, PNorm = 173.0793, GNorm = 0.2167, lr_0 = 1.0483e-04
Loss = 4.0372e-03, PNorm = 173.0795, GNorm = 0.1941, lr_0 = 1.0476e-04
Loss = 1.4958e-03, PNorm = 173.0826, GNorm = 0.1326, lr_0 = 1.0469e-04
Loss = 5.3332e-04, PNorm = 173.0838, GNorm = 0.1427, lr_0 = 1.0462e-04
Loss = 8.7741e-04, PNorm = 173.0855, GNorm = 0.0511, lr_0 = 1.0455e-04
Loss = 8.6747e-04, PNorm = 173.0862, GNorm = 0.0533, lr_0 = 1.0448e-04
Loss = 1.4800e-03, PNorm = 173.0873, GNorm = 0.0517, lr_0 = 1.0440e-04
Loss = 9.4212e-04, PNorm = 173.0867, GNorm = 0.0864, lr_0 = 1.0433e-04
Loss = 1.3855e-03, PNorm = 173.0884, GNorm = 0.0689, lr_0 = 1.0426e-04
Loss = 5.0846e-04, PNorm = 173.0904, GNorm = 0.0975, lr_0 = 1.0419e-04
Loss = 5.1491e-04, PNorm = 173.0925, GNorm = 0.0582, lr_0 = 1.0412e-04
Loss = 1.5470e-03, PNorm = 173.0940, GNorm = 0.0668, lr_0 = 1.0405e-04
Loss = 7.8709e-04, PNorm = 173.0949, GNorm = 0.0349, lr_0 = 1.0398e-04
Loss = 7.9991e-04, PNorm = 173.0966, GNorm = 0.0764, lr_0 = 1.0391e-04
Loss = 1.7384e-03, PNorm = 173.0984, GNorm = 0.2427, lr_0 = 1.0383e-04
Loss = 9.1616e-04, PNorm = 173.0998, GNorm = 0.0570, lr_0 = 1.0376e-04
Loss = 6.0692e-04, PNorm = 173.1001, GNorm = 0.0868, lr_0 = 1.0369e-04
Loss = 7.8892e-04, PNorm = 173.1008, GNorm = 0.0305, lr_0 = 1.0362e-04
Loss = 1.0600e-03, PNorm = 173.1023, GNorm = 0.0458, lr_0 = 1.0355e-04
Loss = 8.0941e-04, PNorm = 173.1033, GNorm = 0.0607, lr_0 = 1.0348e-04
Loss = 1.0797e-03, PNorm = 173.1042, GNorm = 0.0876, lr_0 = 1.0341e-04
Loss = 8.1348e-04, PNorm = 173.1057, GNorm = 0.1097, lr_0 = 1.0334e-04
Loss = 2.2311e-03, PNorm = 173.1070, GNorm = 0.1639, lr_0 = 1.0327e-04
Loss = 3.0649e-03, PNorm = 173.1095, GNorm = 0.1388, lr_0 = 1.0320e-04
Loss = 1.1039e-03, PNorm = 173.1122, GNorm = 0.0453, lr_0 = 1.0312e-04
Loss = 7.8060e-04, PNorm = 173.1135, GNorm = 0.0921, lr_0 = 1.0305e-04
Loss = 1.2166e-03, PNorm = 173.1155, GNorm = 0.0626, lr_0 = 1.0298e-04
Loss = 1.1279e-03, PNorm = 173.1167, GNorm = 0.0754, lr_0 = 1.0291e-04
Loss = 1.4517e-03, PNorm = 173.1177, GNorm = 0.0302, lr_0 = 1.0284e-04
Loss = 2.4567e-03, PNorm = 173.1182, GNorm = 0.0909, lr_0 = 1.0277e-04
Loss = 1.0415e-03, PNorm = 173.1204, GNorm = 0.0853, lr_0 = 1.0270e-04
Loss = 1.3164e-03, PNorm = 173.1214, GNorm = 0.0450, lr_0 = 1.0263e-04
Loss = 7.2675e-04, PNorm = 173.1219, GNorm = 0.1148, lr_0 = 1.0256e-04
Loss = 1.1861e-03, PNorm = 173.1224, GNorm = 0.1356, lr_0 = 1.0249e-04
Loss = 8.7464e-04, PNorm = 173.1227, GNorm = 0.0627, lr_0 = 1.0242e-04
Loss = 6.1999e-04, PNorm = 173.1241, GNorm = 0.0756, lr_0 = 1.0235e-04
Loss = 7.8042e-04, PNorm = 173.1264, GNorm = 0.0915, lr_0 = 1.0228e-04
Loss = 6.5038e-04, PNorm = 173.1286, GNorm = 0.1145, lr_0 = 1.0221e-04
Loss = 2.4853e-03, PNorm = 173.1302, GNorm = 0.2688, lr_0 = 1.0214e-04
Loss = 4.8493e-04, PNorm = 173.1313, GNorm = 0.0537, lr_0 = 1.0207e-04
Loss = 6.2559e-04, PNorm = 173.1321, GNorm = 0.0462, lr_0 = 1.0200e-04
Loss = 7.9306e-04, PNorm = 173.1319, GNorm = 0.0322, lr_0 = 1.0193e-04
Loss = 5.3520e-04, PNorm = 173.1327, GNorm = 0.0665, lr_0 = 1.0186e-04
Loss = 1.8180e-03, PNorm = 173.1341, GNorm = 0.0628, lr_0 = 1.0179e-04
Loss = 1.2951e-03, PNorm = 173.1355, GNorm = 0.0429, lr_0 = 1.0172e-04
Loss = 1.1924e-03, PNorm = 173.1365, GNorm = 0.2225, lr_0 = 1.0165e-04
Loss = 1.9765e-03, PNorm = 173.1389, GNorm = 0.0674, lr_0 = 1.0158e-04
Loss = 8.3594e-04, PNorm = 173.1413, GNorm = 0.1009, lr_0 = 1.0151e-04
Loss = 5.1857e-04, PNorm = 173.1421, GNorm = 0.0501, lr_0 = 1.0144e-04
Loss = 4.7221e-03, PNorm = 173.1415, GNorm = 0.2317, lr_0 = 1.0137e-04
Loss = 6.1538e-04, PNorm = 173.1420, GNorm = 0.1739, lr_0 = 1.0130e-04
Loss = 1.0606e-03, PNorm = 173.1429, GNorm = 0.0385, lr_0 = 1.0123e-04
Loss = 1.5820e-03, PNorm = 173.1448, GNorm = 0.0998, lr_0 = 1.0116e-04
Loss = 7.4583e-04, PNorm = 173.1462, GNorm = 0.0824, lr_0 = 1.0110e-04
Loss = 8.3911e-04, PNorm = 173.1468, GNorm = 0.0544, lr_0 = 1.0103e-04
Loss = 1.3214e-03, PNorm = 173.1471, GNorm = 0.1507, lr_0 = 1.0096e-04
Loss = 5.6384e-04, PNorm = 173.1475, GNorm = 0.1474, lr_0 = 1.0089e-04
Loss = 6.1790e-04, PNorm = 173.1491, GNorm = 0.0578, lr_0 = 1.0082e-04
Loss = 2.2714e-03, PNorm = 173.1506, GNorm = 0.0532, lr_0 = 1.0075e-04
Loss = 5.8392e-04, PNorm = 173.1529, GNorm = 0.0551, lr_0 = 1.0068e-04
Loss = 7.2323e-04, PNorm = 173.1550, GNorm = 0.0275, lr_0 = 1.0061e-04
Loss = 1.5586e-03, PNorm = 173.1564, GNorm = 0.0935, lr_0 = 1.0054e-04
Loss = 2.9117e-03, PNorm = 173.1568, GNorm = 0.0610, lr_0 = 1.0047e-04
Loss = 1.8522e-03, PNorm = 173.1558, GNorm = 0.2368, lr_0 = 1.0041e-04
Loss = 1.5334e-03, PNorm = 173.1561, GNorm = 0.1354, lr_0 = 1.0034e-04
Loss = 2.2488e-03, PNorm = 173.1570, GNorm = 0.0573, lr_0 = 1.0027e-04
Loss = 1.9648e-03, PNorm = 173.1582, GNorm = 0.0709, lr_0 = 1.0020e-04
Loss = 1.1233e-03, PNorm = 173.1604, GNorm = 0.1394, lr_0 = 1.0013e-04
Loss = 7.8113e-04, PNorm = 173.1629, GNorm = 0.0577, lr_0 = 1.0006e-04
Loss = 6.1410e-04, PNorm = 173.1644, GNorm = 0.0932, lr_0 = 1.0000e-04
Validation mae = 0.277720
Model 0 best validation mae = 0.277635 on epoch 26
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.273786
Ensemble test mae = 0.273786
Fold 2
Splitting data with seed 2
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 9.5663e-01, PNorm = 65.7664, GNorm = 2.2042, lr_0 = 1.0413e-04
Loss = 7.5833e-01, PNorm = 65.7786, GNorm = 2.2063, lr_0 = 1.0788e-04
Loss = 5.0519e-01, PNorm = 65.7920, GNorm = 1.7928, lr_0 = 1.1163e-04
Loss = 5.2665e-01, PNorm = 65.8053, GNorm = 2.7770, lr_0 = 1.1537e-04
Loss = 4.3923e-01, PNorm = 65.8160, GNorm = 2.0543, lr_0 = 1.1913e-04
Loss = 4.3448e-01, PNorm = 65.8238, GNorm = 2.2604, lr_0 = 1.2287e-04
Loss = 4.2692e-01, PNorm = 65.8320, GNorm = 2.4464, lr_0 = 1.2663e-04
Loss = 3.6689e-01, PNorm = 65.8407, GNorm = 2.0926, lr_0 = 1.3038e-04
Loss = 3.6840e-01, PNorm = 65.8499, GNorm = 2.4819, lr_0 = 1.3413e-04
Loss = 3.6876e-01, PNorm = 65.8585, GNorm = 1.9194, lr_0 = 1.3788e-04
Loss = 3.8554e-01, PNorm = 65.8679, GNorm = 2.5009, lr_0 = 1.4163e-04
Loss = 4.2568e-01, PNorm = 65.8780, GNorm = 2.2727, lr_0 = 1.4537e-04
Loss = 3.7435e-01, PNorm = 65.8901, GNorm = 2.7961, lr_0 = 1.4913e-04
Loss = 3.9358e-01, PNorm = 65.9042, GNorm = 2.8194, lr_0 = 1.5288e-04
Loss = 3.8343e-01, PNorm = 65.9146, GNorm = 1.8172, lr_0 = 1.5662e-04
Loss = 3.4704e-01, PNorm = 65.9245, GNorm = 1.7525, lr_0 = 1.6038e-04
Loss = 4.0692e-01, PNorm = 65.9372, GNorm = 3.2299, lr_0 = 1.6412e-04
Loss = 4.1740e-01, PNorm = 65.9513, GNorm = 2.7023, lr_0 = 1.6788e-04
Loss = 3.5951e-01, PNorm = 65.9649, GNorm = 2.5277, lr_0 = 1.7163e-04
Loss = 3.4947e-01, PNorm = 65.9759, GNorm = 1.6995, lr_0 = 1.7538e-04
Loss = 3.0707e-01, PNorm = 65.9895, GNorm = 1.4864, lr_0 = 1.7913e-04
Loss = 4.1315e-01, PNorm = 66.0019, GNorm = 1.6501, lr_0 = 1.8288e-04
Loss = 3.1474e-01, PNorm = 66.0178, GNorm = 1.6078, lr_0 = 1.8662e-04
Loss = 3.5290e-01, PNorm = 66.0314, GNorm = 1.6041, lr_0 = 1.9038e-04
Loss = 2.9866e-01, PNorm = 66.0434, GNorm = 1.5075, lr_0 = 1.9413e-04
Loss = 3.2193e-01, PNorm = 66.0593, GNorm = 1.5339, lr_0 = 1.9788e-04
Loss = 3.3183e-01, PNorm = 66.0731, GNorm = 2.1818, lr_0 = 2.0163e-04
Loss = 2.8125e-01, PNorm = 66.0883, GNorm = 2.1116, lr_0 = 2.0537e-04
Loss = 3.3575e-01, PNorm = 66.1026, GNorm = 2.7213, lr_0 = 2.0913e-04
Loss = 3.4972e-01, PNorm = 66.1214, GNorm = 2.2642, lr_0 = 2.1288e-04
Loss = 3.0815e-01, PNorm = 66.1386, GNorm = 1.5549, lr_0 = 2.1663e-04
Loss = 3.2528e-01, PNorm = 66.1566, GNorm = 2.2253, lr_0 = 2.2038e-04
Loss = 3.5027e-01, PNorm = 66.1750, GNorm = 2.2235, lr_0 = 2.2412e-04
Loss = 2.5325e-01, PNorm = 66.1927, GNorm = 1.3140, lr_0 = 2.2787e-04
Loss = 3.1224e-01, PNorm = 66.2078, GNorm = 1.6328, lr_0 = 2.3163e-04
Loss = 3.1027e-01, PNorm = 66.2256, GNorm = 2.1025, lr_0 = 2.3538e-04
Loss = 3.1038e-01, PNorm = 66.2460, GNorm = 1.6166, lr_0 = 2.3913e-04
Loss = 2.5320e-01, PNorm = 66.2644, GNorm = 1.8175, lr_0 = 2.4288e-04
Loss = 3.2274e-01, PNorm = 66.2831, GNorm = 1.7285, lr_0 = 2.4662e-04
Loss = 3.2069e-01, PNorm = 66.3048, GNorm = 1.4731, lr_0 = 2.5038e-04
Loss = 3.0488e-01, PNorm = 66.3260, GNorm = 1.1630, lr_0 = 2.5413e-04
Loss = 2.9660e-01, PNorm = 66.3462, GNorm = 2.0486, lr_0 = 2.5788e-04
Loss = 3.0845e-01, PNorm = 66.3683, GNorm = 1.3379, lr_0 = 2.6163e-04
Loss = 2.4966e-01, PNorm = 66.3911, GNorm = 1.4105, lr_0 = 2.6537e-04
Loss = 2.8999e-01, PNorm = 66.4128, GNorm = 2.1925, lr_0 = 2.6912e-04
Loss = 2.9327e-01, PNorm = 66.4342, GNorm = 1.2343, lr_0 = 2.7288e-04
Loss = 3.0777e-01, PNorm = 66.4595, GNorm = 1.2633, lr_0 = 2.7663e-04
Loss = 3.0586e-01, PNorm = 66.4872, GNorm = 1.7531, lr_0 = 2.8038e-04
Loss = 3.0256e-01, PNorm = 66.5103, GNorm = 1.1562, lr_0 = 2.8413e-04
Loss = 3.0852e-01, PNorm = 66.5400, GNorm = 1.4834, lr_0 = 2.8787e-04
Loss = 2.8444e-01, PNorm = 66.5646, GNorm = 1.4837, lr_0 = 2.9163e-04
Loss = 2.5912e-01, PNorm = 66.5923, GNorm = 1.0977, lr_0 = 2.9538e-04
Loss = 3.0604e-01, PNorm = 66.6186, GNorm = 1.7583, lr_0 = 2.9913e-04
Loss = 3.1810e-01, PNorm = 66.6444, GNorm = 1.3761, lr_0 = 3.0288e-04
Loss = 3.0840e-01, PNorm = 66.6746, GNorm = 1.4762, lr_0 = 3.0662e-04
Loss = 2.7570e-01, PNorm = 66.7059, GNorm = 2.6343, lr_0 = 3.1037e-04
Loss = 3.2021e-01, PNorm = 66.7314, GNorm = 1.1922, lr_0 = 3.1413e-04
Loss = 2.7869e-01, PNorm = 66.7622, GNorm = 0.9425, lr_0 = 3.1788e-04
Loss = 2.9102e-01, PNorm = 66.7905, GNorm = 2.2449, lr_0 = 3.2163e-04
Loss = 2.8572e-01, PNorm = 66.8253, GNorm = 0.9602, lr_0 = 3.2538e-04
Loss = 3.0022e-01, PNorm = 66.8534, GNorm = 1.7804, lr_0 = 3.2912e-04
Loss = 3.2114e-01, PNorm = 66.8798, GNorm = 1.2336, lr_0 = 3.3288e-04
Loss = 2.5372e-01, PNorm = 66.9134, GNorm = 1.2926, lr_0 = 3.3663e-04
Loss = 2.7660e-01, PNorm = 66.9370, GNorm = 1.3001, lr_0 = 3.4038e-04
Loss = 3.1176e-01, PNorm = 66.9704, GNorm = 1.5407, lr_0 = 3.4413e-04
Loss = 2.9859e-01, PNorm = 66.9999, GNorm = 1.2324, lr_0 = 3.4787e-04
Loss = 2.6561e-01, PNorm = 67.0319, GNorm = 1.1986, lr_0 = 3.5162e-04
Loss = 3.0977e-01, PNorm = 67.0638, GNorm = 1.4727, lr_0 = 3.5538e-04
Loss = 2.9560e-01, PNorm = 67.0993, GNorm = 1.2119, lr_0 = 3.5913e-04
Loss = 2.7142e-01, PNorm = 67.1286, GNorm = 0.9403, lr_0 = 3.6288e-04
Loss = 2.7271e-01, PNorm = 67.1640, GNorm = 1.2693, lr_0 = 3.6662e-04
Loss = 2.5504e-01, PNorm = 67.1928, GNorm = 0.9468, lr_0 = 3.7037e-04
Loss = 2.8546e-01, PNorm = 67.2242, GNorm = 1.0858, lr_0 = 3.7413e-04
Loss = 2.8322e-01, PNorm = 67.2615, GNorm = 1.2447, lr_0 = 3.7788e-04
Loss = 2.7853e-01, PNorm = 67.3027, GNorm = 1.5207, lr_0 = 3.8163e-04
Loss = 2.6458e-01, PNorm = 67.3379, GNorm = 0.9281, lr_0 = 3.8537e-04
Loss = 2.9492e-01, PNorm = 67.3752, GNorm = 0.9488, lr_0 = 3.8912e-04
Loss = 2.7005e-01, PNorm = 67.4155, GNorm = 1.3876, lr_0 = 3.9287e-04
Loss = 3.0080e-01, PNorm = 67.4534, GNorm = 0.9785, lr_0 = 3.9663e-04
Loss = 2.6110e-01, PNorm = 67.4917, GNorm = 1.0950, lr_0 = 4.0038e-04
Loss = 3.0305e-01, PNorm = 67.5305, GNorm = 1.5896, lr_0 = 4.0413e-04
Loss = 2.7809e-01, PNorm = 67.5764, GNorm = 1.0389, lr_0 = 4.0787e-04
Loss = 2.8030e-01, PNorm = 67.6131, GNorm = 1.1144, lr_0 = 4.1162e-04
Loss = 2.6520e-01, PNorm = 67.6553, GNorm = 1.2288, lr_0 = 4.1537e-04
Loss = 2.6701e-01, PNorm = 67.7004, GNorm = 1.0937, lr_0 = 4.1913e-04
Loss = 2.4174e-01, PNorm = 67.7385, GNorm = 1.2894, lr_0 = 4.2288e-04
Loss = 2.5411e-01, PNorm = 67.7778, GNorm = 1.1605, lr_0 = 4.2662e-04
Loss = 2.5120e-01, PNorm = 67.8157, GNorm = 0.8848, lr_0 = 4.3037e-04
Loss = 2.8723e-01, PNorm = 67.8569, GNorm = 0.8272, lr_0 = 4.3412e-04
Loss = 2.3823e-01, PNorm = 67.8998, GNorm = 0.9000, lr_0 = 4.3788e-04
Loss = 2.8813e-01, PNorm = 67.9466, GNorm = 1.3302, lr_0 = 4.4163e-04
Loss = 2.9922e-01, PNorm = 67.9935, GNorm = 1.1762, lr_0 = 4.4538e-04
Loss = 2.7887e-01, PNorm = 68.0405, GNorm = 1.2368, lr_0 = 4.4912e-04
Loss = 3.2479e-01, PNorm = 68.0929, GNorm = 1.4499, lr_0 = 4.5287e-04
Loss = 2.9495e-01, PNorm = 68.1369, GNorm = 1.2996, lr_0 = 4.5662e-04
Loss = 2.6206e-01, PNorm = 68.1852, GNorm = 1.0434, lr_0 = 4.6038e-04
Loss = 2.5173e-01, PNorm = 68.2378, GNorm = 0.8497, lr_0 = 4.6413e-04
Loss = 2.5541e-01, PNorm = 68.2841, GNorm = 1.2607, lr_0 = 4.6787e-04
Loss = 2.8189e-01, PNorm = 68.3315, GNorm = 1.1744, lr_0 = 4.7162e-04
Loss = 2.7431e-01, PNorm = 68.3859, GNorm = 0.9715, lr_0 = 4.7537e-04
Loss = 2.6905e-01, PNorm = 68.4387, GNorm = 1.3713, lr_0 = 4.7913e-04
Loss = 2.4857e-01, PNorm = 68.4844, GNorm = 0.9744, lr_0 = 4.8288e-04
Loss = 2.6577e-01, PNorm = 68.5413, GNorm = 1.2272, lr_0 = 4.8663e-04
Loss = 2.7775e-01, PNorm = 68.5938, GNorm = 1.2655, lr_0 = 4.9038e-04
Loss = 2.1339e-01, PNorm = 68.6541, GNorm = 1.0393, lr_0 = 4.9412e-04
Loss = 2.4721e-01, PNorm = 68.6981, GNorm = 1.1146, lr_0 = 4.9788e-04
Loss = 2.6730e-01, PNorm = 68.7507, GNorm = 1.0392, lr_0 = 5.0163e-04
Loss = 2.4575e-01, PNorm = 68.8078, GNorm = 1.0336, lr_0 = 5.0538e-04
Loss = 2.8618e-01, PNorm = 68.8591, GNorm = 0.8554, lr_0 = 5.0913e-04
Loss = 2.3391e-01, PNorm = 68.9181, GNorm = 1.1831, lr_0 = 5.1287e-04
Loss = 2.6103e-01, PNorm = 68.9729, GNorm = 0.9869, lr_0 = 5.1663e-04
Loss = 2.5500e-01, PNorm = 69.0343, GNorm = 1.2132, lr_0 = 5.2038e-04
Loss = 2.3264e-01, PNorm = 69.0866, GNorm = 1.0441, lr_0 = 5.2413e-04
Loss = 2.5617e-01, PNorm = 69.1474, GNorm = 0.9690, lr_0 = 5.2788e-04
Loss = 2.8163e-01, PNorm = 69.2063, GNorm = 0.9686, lr_0 = 5.3162e-04
Loss = 2.7212e-01, PNorm = 69.2641, GNorm = 1.0073, lr_0 = 5.3538e-04
Loss = 2.4191e-01, PNorm = 69.3358, GNorm = 1.1104, lr_0 = 5.3912e-04
Loss = 2.2361e-01, PNorm = 69.3938, GNorm = 1.1455, lr_0 = 5.4288e-04
Loss = 2.5353e-01, PNorm = 69.4561, GNorm = 1.1940, lr_0 = 5.4663e-04
Loss = 2.4731e-01, PNorm = 69.5268, GNorm = 0.8205, lr_0 = 5.5038e-04
Validation mae = 0.332563
Epoch 1
Loss = 1.8565e-01, PNorm = 69.5796, GNorm = 0.8604, lr_0 = 5.5413e-04
Loss = 1.9872e-01, PNorm = 69.6409, GNorm = 1.1836, lr_0 = 5.5787e-04
Loss = 1.9213e-01, PNorm = 69.6931, GNorm = 0.6791, lr_0 = 5.6163e-04
Loss = 1.6138e-01, PNorm = 69.7523, GNorm = 0.7969, lr_0 = 5.6538e-04
Loss = 1.5599e-01, PNorm = 69.8054, GNorm = 0.8420, lr_0 = 5.6913e-04
Loss = 1.6370e-01, PNorm = 69.8556, GNorm = 0.7274, lr_0 = 5.7288e-04
Loss = 1.8661e-01, PNorm = 69.9153, GNorm = 0.7254, lr_0 = 5.7662e-04
Loss = 1.6882e-01, PNorm = 69.9750, GNorm = 0.7646, lr_0 = 5.8038e-04
Loss = 1.7107e-01, PNorm = 70.0403, GNorm = 0.8253, lr_0 = 5.8413e-04
Loss = 1.7805e-01, PNorm = 70.1078, GNorm = 0.8082, lr_0 = 5.8788e-04
Loss = 1.7250e-01, PNorm = 70.1735, GNorm = 0.8373, lr_0 = 5.9163e-04
Loss = 2.1091e-01, PNorm = 70.2376, GNorm = 0.6316, lr_0 = 5.9538e-04
Loss = 1.7211e-01, PNorm = 70.3136, GNorm = 1.0041, lr_0 = 5.9913e-04
Loss = 1.5829e-01, PNorm = 70.3861, GNorm = 0.7340, lr_0 = 6.0288e-04
Loss = 1.6263e-01, PNorm = 70.4644, GNorm = 0.8027, lr_0 = 6.0663e-04
Loss = 1.6634e-01, PNorm = 70.5321, GNorm = 0.9002, lr_0 = 6.1038e-04
Loss = 1.8739e-01, PNorm = 70.6104, GNorm = 1.2140, lr_0 = 6.1413e-04
Loss = 1.8100e-01, PNorm = 70.6914, GNorm = 0.8266, lr_0 = 6.1788e-04
Loss = 1.6770e-01, PNorm = 70.7792, GNorm = 0.7462, lr_0 = 6.2163e-04
Loss = 1.6217e-01, PNorm = 70.8598, GNorm = 0.8595, lr_0 = 6.2538e-04
Loss = 1.8184e-01, PNorm = 70.9383, GNorm = 0.7341, lr_0 = 6.2913e-04
Loss = 1.7476e-01, PNorm = 71.0196, GNorm = 0.9447, lr_0 = 6.3288e-04
Loss = 1.7523e-01, PNorm = 71.0999, GNorm = 1.0016, lr_0 = 6.3663e-04
Loss = 1.7673e-01, PNorm = 71.1802, GNorm = 1.1663, lr_0 = 6.4038e-04
Loss = 1.8016e-01, PNorm = 71.2655, GNorm = 0.8594, lr_0 = 6.4413e-04
Loss = 1.8445e-01, PNorm = 71.3424, GNorm = 1.3849, lr_0 = 6.4788e-04
Loss = 1.9720e-01, PNorm = 71.4247, GNorm = 0.8223, lr_0 = 6.5163e-04
Loss = 1.8223e-01, PNorm = 71.5145, GNorm = 0.7340, lr_0 = 6.5538e-04
Loss = 1.6926e-01, PNorm = 71.6053, GNorm = 1.3931, lr_0 = 6.5913e-04
Loss = 1.8172e-01, PNorm = 71.6966, GNorm = 0.6867, lr_0 = 6.6288e-04
Loss = 1.9457e-01, PNorm = 71.7905, GNorm = 0.8578, lr_0 = 6.6663e-04
Loss = 1.8852e-01, PNorm = 71.8867, GNorm = 1.0063, lr_0 = 6.7038e-04
Loss = 1.8194e-01, PNorm = 71.9786, GNorm = 0.9272, lr_0 = 6.7413e-04
Loss = 1.8050e-01, PNorm = 72.0709, GNorm = 0.9903, lr_0 = 6.7788e-04
Loss = 1.8860e-01, PNorm = 72.1706, GNorm = 0.9457, lr_0 = 6.8163e-04
Loss = 1.9510e-01, PNorm = 72.2730, GNorm = 0.9153, lr_0 = 6.8538e-04
Loss = 1.8132e-01, PNorm = 72.3730, GNorm = 0.7742, lr_0 = 6.8913e-04
Loss = 1.8689e-01, PNorm = 72.4756, GNorm = 1.2028, lr_0 = 6.9288e-04
Loss = 2.1395e-01, PNorm = 72.5767, GNorm = 0.8892, lr_0 = 6.9663e-04
Loss = 1.9516e-01, PNorm = 72.6874, GNorm = 1.2062, lr_0 = 7.0038e-04
Loss = 1.8962e-01, PNorm = 72.7900, GNorm = 0.9077, lr_0 = 7.0413e-04
Loss = 1.9011e-01, PNorm = 72.9012, GNorm = 1.1913, lr_0 = 7.0788e-04
Loss = 1.8735e-01, PNorm = 73.0039, GNorm = 0.8522, lr_0 = 7.1163e-04
Loss = 1.7934e-01, PNorm = 73.1208, GNorm = 0.8620, lr_0 = 7.1538e-04
Loss = 1.9058e-01, PNorm = 73.2239, GNorm = 0.9447, lr_0 = 7.1913e-04
Loss = 2.1206e-01, PNorm = 73.3343, GNorm = 0.8142, lr_0 = 7.2288e-04
Loss = 1.9505e-01, PNorm = 73.4483, GNorm = 1.1253, lr_0 = 7.2663e-04
Loss = 2.0960e-01, PNorm = 73.5587, GNorm = 1.0949, lr_0 = 7.3038e-04
Loss = 1.9390e-01, PNorm = 73.6789, GNorm = 0.9950, lr_0 = 7.3413e-04
Loss = 1.7976e-01, PNorm = 73.7781, GNorm = 0.8645, lr_0 = 7.3788e-04
Loss = 1.7739e-01, PNorm = 73.8867, GNorm = 0.9486, lr_0 = 7.4163e-04
Loss = 1.7637e-01, PNorm = 73.9861, GNorm = 0.7188, lr_0 = 7.4538e-04
Loss = 1.9229e-01, PNorm = 74.0946, GNorm = 0.8378, lr_0 = 7.4913e-04
Loss = 2.1861e-01, PNorm = 74.2062, GNorm = 1.0931, lr_0 = 7.5288e-04
Loss = 2.0509e-01, PNorm = 74.3174, GNorm = 0.7910, lr_0 = 7.5663e-04
Loss = 1.9932e-01, PNorm = 74.4290, GNorm = 0.7707, lr_0 = 7.6038e-04
Loss = 2.3122e-01, PNorm = 74.5419, GNorm = 0.8026, lr_0 = 7.6413e-04
Loss = 2.1407e-01, PNorm = 74.6630, GNorm = 1.2131, lr_0 = 7.6788e-04
Loss = 1.9986e-01, PNorm = 74.7884, GNorm = 1.0061, lr_0 = 7.7163e-04
Loss = 1.9961e-01, PNorm = 74.9073, GNorm = 0.8726, lr_0 = 7.7538e-04
Loss = 1.8582e-01, PNorm = 75.0266, GNorm = 0.8337, lr_0 = 7.7913e-04
Loss = 2.2342e-01, PNorm = 75.1547, GNorm = 1.1576, lr_0 = 7.8288e-04
Loss = 1.9492e-01, PNorm = 75.2868, GNorm = 0.7224, lr_0 = 7.8663e-04
Loss = 2.1673e-01, PNorm = 75.4200, GNorm = 0.8228, lr_0 = 7.9038e-04
Loss = 1.9657e-01, PNorm = 75.5647, GNorm = 1.0618, lr_0 = 7.9413e-04
Loss = 2.1900e-01, PNorm = 75.6916, GNorm = 0.9777, lr_0 = 7.9788e-04
Loss = 2.2104e-01, PNorm = 75.8290, GNorm = 1.5329, lr_0 = 8.0163e-04
Loss = 2.2822e-01, PNorm = 75.9525, GNorm = 0.8734, lr_0 = 8.0538e-04
Loss = 2.3770e-01, PNorm = 76.1051, GNorm = 1.4263, lr_0 = 8.0913e-04
Loss = 2.0341e-01, PNorm = 76.2272, GNorm = 0.6192, lr_0 = 8.1288e-04
Loss = 2.2298e-01, PNorm = 76.3772, GNorm = 1.4132, lr_0 = 8.1663e-04
Loss = 2.1933e-01, PNorm = 76.5009, GNorm = 0.8188, lr_0 = 8.2038e-04
Loss = 1.9386e-01, PNorm = 76.6411, GNorm = 0.7802, lr_0 = 8.2413e-04
Loss = 2.3062e-01, PNorm = 76.7641, GNorm = 1.0912, lr_0 = 8.2788e-04
Loss = 1.9359e-01, PNorm = 76.9003, GNorm = 0.5934, lr_0 = 8.3163e-04
Loss = 1.8181e-01, PNorm = 77.0254, GNorm = 0.6679, lr_0 = 8.3538e-04
Loss = 2.1660e-01, PNorm = 77.1516, GNorm = 0.6758, lr_0 = 8.3913e-04
Loss = 2.0065e-01, PNorm = 77.2818, GNorm = 0.9680, lr_0 = 8.4288e-04
Loss = 1.9108e-01, PNorm = 77.4029, GNorm = 1.1782, lr_0 = 8.4663e-04
Loss = 1.8344e-01, PNorm = 77.5266, GNorm = 0.8601, lr_0 = 8.5038e-04
Loss = 1.9239e-01, PNorm = 77.6435, GNorm = 0.7190, lr_0 = 8.5413e-04
Loss = 2.1133e-01, PNorm = 77.7655, GNorm = 0.9385, lr_0 = 8.5788e-04
Loss = 1.9885e-01, PNorm = 77.8994, GNorm = 0.8943, lr_0 = 8.6163e-04
Loss = 2.6142e-01, PNorm = 78.0240, GNorm = 0.8062, lr_0 = 8.6538e-04
Loss = 1.8881e-01, PNorm = 78.1635, GNorm = 0.8287, lr_0 = 8.6913e-04
Loss = 2.2246e-01, PNorm = 78.3061, GNorm = 0.8517, lr_0 = 8.7288e-04
Loss = 2.1645e-01, PNorm = 78.4480, GNorm = 1.1203, lr_0 = 8.7663e-04
Loss = 2.1502e-01, PNorm = 78.6073, GNorm = 0.8729, lr_0 = 8.8038e-04
Loss = 2.0844e-01, PNorm = 78.7439, GNorm = 0.5883, lr_0 = 8.8413e-04
Loss = 2.1218e-01, PNorm = 78.8852, GNorm = 0.8693, lr_0 = 8.8788e-04
Loss = 2.1800e-01, PNorm = 79.0199, GNorm = 0.8574, lr_0 = 8.9163e-04
Loss = 2.0711e-01, PNorm = 79.1678, GNorm = 0.8063, lr_0 = 8.9538e-04
Loss = 1.8272e-01, PNorm = 79.3043, GNorm = 0.7318, lr_0 = 8.9913e-04
Loss = 1.9619e-01, PNorm = 79.4443, GNorm = 0.7339, lr_0 = 9.0288e-04
Loss = 1.9474e-01, PNorm = 79.5807, GNorm = 1.0513, lr_0 = 9.0663e-04
Loss = 2.0126e-01, PNorm = 79.7291, GNorm = 0.9084, lr_0 = 9.1038e-04
Loss = 2.1746e-01, PNorm = 79.8670, GNorm = 1.5061, lr_0 = 9.1413e-04
Loss = 1.8691e-01, PNorm = 80.0180, GNorm = 0.6774, lr_0 = 9.1788e-04
Loss = 2.2358e-01, PNorm = 80.1550, GNorm = 0.6899, lr_0 = 9.2163e-04
Loss = 2.0579e-01, PNorm = 80.3020, GNorm = 0.7011, lr_0 = 9.2538e-04
Loss = 2.0210e-01, PNorm = 80.4369, GNorm = 0.7103, lr_0 = 9.2913e-04
Loss = 2.3745e-01, PNorm = 80.5898, GNorm = 1.1590, lr_0 = 9.3288e-04
Loss = 2.3136e-01, PNorm = 80.7501, GNorm = 0.8989, lr_0 = 9.3663e-04
Loss = 2.2462e-01, PNorm = 80.9053, GNorm = 0.9056, lr_0 = 9.4038e-04
Loss = 2.0190e-01, PNorm = 81.0579, GNorm = 0.8846, lr_0 = 9.4413e-04
Loss = 1.8473e-01, PNorm = 81.1993, GNorm = 0.8632, lr_0 = 9.4788e-04
Loss = 1.9757e-01, PNorm = 81.3445, GNorm = 1.4164, lr_0 = 9.5163e-04
Loss = 1.9078e-01, PNorm = 81.4670, GNorm = 0.5852, lr_0 = 9.5538e-04
Loss = 2.3099e-01, PNorm = 81.6104, GNorm = 0.7425, lr_0 = 9.5913e-04
Loss = 2.3590e-01, PNorm = 81.7536, GNorm = 0.6103, lr_0 = 9.6288e-04
Loss = 2.1862e-01, PNorm = 81.9143, GNorm = 0.8549, lr_0 = 9.6663e-04
Loss = 2.0185e-01, PNorm = 82.0491, GNorm = 0.7163, lr_0 = 9.7038e-04
Loss = 2.0605e-01, PNorm = 82.2058, GNorm = 0.6563, lr_0 = 9.7413e-04
Loss = 2.0171e-01, PNorm = 82.3453, GNorm = 0.8038, lr_0 = 9.7788e-04
Loss = 2.1565e-01, PNorm = 82.4988, GNorm = 0.8012, lr_0 = 9.8163e-04
Loss = 2.1996e-01, PNorm = 82.6381, GNorm = 1.1634, lr_0 = 9.8537e-04
Loss = 2.0694e-01, PNorm = 82.8036, GNorm = 0.9097, lr_0 = 9.8912e-04
Loss = 1.8865e-01, PNorm = 82.9601, GNorm = 1.0045, lr_0 = 9.9288e-04
Loss = 2.2403e-01, PNorm = 83.1285, GNorm = 0.9788, lr_0 = 9.9663e-04
Loss = 2.0987e-01, PNorm = 83.2992, GNorm = 0.8125, lr_0 = 9.9993e-04
Validation mae = 0.309568
Epoch 2
Loss = 1.4792e-01, PNorm = 83.4572, GNorm = 1.5064, lr_0 = 9.9925e-04
Loss = 1.4455e-01, PNorm = 83.5762, GNorm = 0.4278, lr_0 = 9.9856e-04
Loss = 1.1788e-01, PNorm = 83.7000, GNorm = 0.6415, lr_0 = 9.9788e-04
Loss = 1.3913e-01, PNorm = 83.8126, GNorm = 0.7204, lr_0 = 9.9719e-04
Loss = 1.3516e-01, PNorm = 83.9370, GNorm = 0.5805, lr_0 = 9.9651e-04
Loss = 1.2333e-01, PNorm = 84.0451, GNorm = 0.5455, lr_0 = 9.9583e-04
Loss = 1.1704e-01, PNorm = 84.1567, GNorm = 0.7425, lr_0 = 9.9515e-04
Loss = 1.3611e-01, PNorm = 84.2493, GNorm = 0.9417, lr_0 = 9.9446e-04
Loss = 1.2700e-01, PNorm = 84.3723, GNorm = 1.0601, lr_0 = 9.9378e-04
Loss = 1.2544e-01, PNorm = 84.4804, GNorm = 0.6878, lr_0 = 9.9310e-04
Loss = 1.1981e-01, PNorm = 84.5856, GNorm = 0.5081, lr_0 = 9.9242e-04
Loss = 1.2575e-01, PNorm = 84.6858, GNorm = 0.7297, lr_0 = 9.9174e-04
Loss = 1.3062e-01, PNorm = 84.7907, GNorm = 0.5564, lr_0 = 9.9106e-04
Loss = 1.2871e-01, PNorm = 84.8968, GNorm = 0.7121, lr_0 = 9.9038e-04
Loss = 1.2725e-01, PNorm = 85.0177, GNorm = 0.6038, lr_0 = 9.8971e-04
Loss = 1.1903e-01, PNorm = 85.1283, GNorm = 0.5317, lr_0 = 9.8903e-04
Loss = 1.2481e-01, PNorm = 85.2471, GNorm = 0.7820, lr_0 = 9.8835e-04
Loss = 1.1134e-01, PNorm = 85.3590, GNorm = 0.6934, lr_0 = 9.8767e-04
Loss = 1.2938e-01, PNorm = 85.4762, GNorm = 0.5126, lr_0 = 9.8700e-04
Loss = 1.3592e-01, PNorm = 85.5798, GNorm = 0.8117, lr_0 = 9.8632e-04
Loss = 1.4625e-01, PNorm = 85.7172, GNorm = 0.9551, lr_0 = 9.8564e-04
Loss = 1.3737e-01, PNorm = 85.8454, GNorm = 0.6568, lr_0 = 9.8497e-04
Loss = 1.3747e-01, PNorm = 85.9793, GNorm = 0.5773, lr_0 = 9.8429e-04
Loss = 1.2903e-01, PNorm = 86.1009, GNorm = 0.6563, lr_0 = 9.8362e-04
Loss = 1.1749e-01, PNorm = 86.2141, GNorm = 0.7605, lr_0 = 9.8295e-04
Loss = 1.3518e-01, PNorm = 86.3423, GNorm = 0.6048, lr_0 = 9.8227e-04
Loss = 1.1591e-01, PNorm = 86.4627, GNorm = 0.5122, lr_0 = 9.8160e-04
Loss = 1.3815e-01, PNorm = 86.6003, GNorm = 0.7526, lr_0 = 9.8093e-04
Loss = 1.2105e-01, PNorm = 86.7053, GNorm = 0.7111, lr_0 = 9.8026e-04
Loss = 1.4328e-01, PNorm = 86.8374, GNorm = 0.5862, lr_0 = 9.7958e-04
Loss = 1.4388e-01, PNorm = 86.9649, GNorm = 0.6968, lr_0 = 9.7891e-04
Loss = 1.2758e-01, PNorm = 87.1101, GNorm = 0.7343, lr_0 = 9.7824e-04
Loss = 1.3348e-01, PNorm = 87.2299, GNorm = 0.7439, lr_0 = 9.7757e-04
Loss = 1.2731e-01, PNorm = 87.3661, GNorm = 0.8461, lr_0 = 9.7690e-04
Loss = 1.5390e-01, PNorm = 87.4761, GNorm = 0.5600, lr_0 = 9.7623e-04
Loss = 1.4692e-01, PNorm = 87.6240, GNorm = 0.7993, lr_0 = 9.7556e-04
Loss = 1.3903e-01, PNorm = 87.7556, GNorm = 0.6970, lr_0 = 9.7490e-04
Loss = 1.3350e-01, PNorm = 87.8844, GNorm = 0.4680, lr_0 = 9.7423e-04
Loss = 1.2656e-01, PNorm = 88.0118, GNorm = 0.5087, lr_0 = 9.7356e-04
Loss = 1.2119e-01, PNorm = 88.1290, GNorm = 0.9000, lr_0 = 9.7289e-04
Loss = 1.3680e-01, PNorm = 88.2446, GNorm = 0.6066, lr_0 = 9.7223e-04
Loss = 1.2392e-01, PNorm = 88.3595, GNorm = 1.0342, lr_0 = 9.7156e-04
Loss = 1.2383e-01, PNorm = 88.4846, GNorm = 0.8506, lr_0 = 9.7090e-04
Loss = 1.3011e-01, PNorm = 88.5954, GNorm = 0.5732, lr_0 = 9.7023e-04
Loss = 1.3133e-01, PNorm = 88.7275, GNorm = 0.7862, lr_0 = 9.6957e-04
Loss = 1.4606e-01, PNorm = 88.8547, GNorm = 0.7861, lr_0 = 9.6890e-04
Loss = 1.2686e-01, PNorm = 88.9773, GNorm = 0.8028, lr_0 = 9.6824e-04
Loss = 1.3475e-01, PNorm = 89.1006, GNorm = 0.4425, lr_0 = 9.6757e-04
Loss = 1.4408e-01, PNorm = 89.2367, GNorm = 0.4283, lr_0 = 9.6691e-04
Loss = 1.2832e-01, PNorm = 89.3812, GNorm = 0.6024, lr_0 = 9.6625e-04
Loss = 1.2473e-01, PNorm = 89.5110, GNorm = 0.5599, lr_0 = 9.6559e-04
Loss = 1.4251e-01, PNorm = 89.6376, GNorm = 0.3960, lr_0 = 9.6493e-04
Loss = 1.4401e-01, PNorm = 89.7631, GNorm = 0.5816, lr_0 = 9.6427e-04
Loss = 1.3992e-01, PNorm = 89.8917, GNorm = 0.7647, lr_0 = 9.6360e-04
Loss = 1.4799e-01, PNorm = 90.0178, GNorm = 1.1614, lr_0 = 9.6294e-04
Loss = 1.4680e-01, PNorm = 90.1524, GNorm = 0.3910, lr_0 = 9.6228e-04
Loss = 1.3116e-01, PNorm = 90.2792, GNorm = 0.7450, lr_0 = 9.6163e-04
Loss = 1.1812e-01, PNorm = 90.4006, GNorm = 0.6327, lr_0 = 9.6097e-04
Loss = 1.5280e-01, PNorm = 90.5225, GNorm = 0.6314, lr_0 = 9.6031e-04
Loss = 1.3254e-01, PNorm = 90.6575, GNorm = 0.5596, lr_0 = 9.5965e-04
Loss = 1.4183e-01, PNorm = 90.7914, GNorm = 0.6291, lr_0 = 9.5899e-04
Loss = 1.4158e-01, PNorm = 90.9111, GNorm = 0.4915, lr_0 = 9.5834e-04
Loss = 1.4106e-01, PNorm = 91.0407, GNorm = 0.4240, lr_0 = 9.5768e-04
Loss = 1.2548e-01, PNorm = 91.1732, GNorm = 0.5181, lr_0 = 9.5702e-04
Loss = 1.5386e-01, PNorm = 91.2962, GNorm = 0.8774, lr_0 = 9.5637e-04
Loss = 1.4317e-01, PNorm = 91.4166, GNorm = 0.4946, lr_0 = 9.5571e-04
Loss = 1.3130e-01, PNorm = 91.5514, GNorm = 0.5192, lr_0 = 9.5506e-04
Loss = 1.3744e-01, PNorm = 91.6723, GNorm = 0.5222, lr_0 = 9.5440e-04
Loss = 1.3092e-01, PNorm = 91.7915, GNorm = 0.9488, lr_0 = 9.5375e-04
Loss = 1.2573e-01, PNorm = 91.9139, GNorm = 0.5504, lr_0 = 9.5310e-04
Loss = 1.5910e-01, PNorm = 92.0291, GNorm = 0.6654, lr_0 = 9.5244e-04
Loss = 1.5268e-01, PNorm = 92.1625, GNorm = 0.7531, lr_0 = 9.5179e-04
Loss = 1.5529e-01, PNorm = 92.2895, GNorm = 0.8334, lr_0 = 9.5114e-04
Loss = 1.4554e-01, PNorm = 92.4167, GNorm = 1.0992, lr_0 = 9.5049e-04
Loss = 1.8151e-01, PNorm = 92.5646, GNorm = 0.7467, lr_0 = 9.4984e-04
Loss = 1.6107e-01, PNorm = 92.6994, GNorm = 0.9397, lr_0 = 9.4919e-04
Loss = 1.4406e-01, PNorm = 92.8320, GNorm = 0.8870, lr_0 = 9.4854e-04
Loss = 1.3310e-01, PNorm = 92.9681, GNorm = 0.6595, lr_0 = 9.4789e-04
Loss = 1.4102e-01, PNorm = 93.0975, GNorm = 0.6772, lr_0 = 9.4724e-04
Loss = 1.2960e-01, PNorm = 93.2227, GNorm = 0.7764, lr_0 = 9.4659e-04
Loss = 1.5473e-01, PNorm = 93.3575, GNorm = 0.7096, lr_0 = 9.4594e-04
Loss = 1.2930e-01, PNorm = 93.4905, GNorm = 0.6163, lr_0 = 9.4529e-04
Loss = 1.4529e-01, PNorm = 93.6153, GNorm = 0.4782, lr_0 = 9.4464e-04
Loss = 1.3450e-01, PNorm = 93.7401, GNorm = 0.5119, lr_0 = 9.4400e-04
Loss = 1.3544e-01, PNorm = 93.8632, GNorm = 0.5980, lr_0 = 9.4335e-04
Loss = 1.5115e-01, PNorm = 93.9893, GNorm = 0.8848, lr_0 = 9.4270e-04
Loss = 1.4750e-01, PNorm = 94.1160, GNorm = 1.1310, lr_0 = 9.4206e-04
Loss = 1.3922e-01, PNorm = 94.2354, GNorm = 0.8333, lr_0 = 9.4141e-04
Loss = 1.3563e-01, PNorm = 94.3580, GNorm = 1.1873, lr_0 = 9.4077e-04
Loss = 1.3256e-01, PNorm = 94.4674, GNorm = 0.8139, lr_0 = 9.4012e-04
Loss = 1.2163e-01, PNorm = 94.5804, GNorm = 0.6937, lr_0 = 9.3948e-04
Loss = 1.5240e-01, PNorm = 94.6836, GNorm = 0.7385, lr_0 = 9.3884e-04
Loss = 1.2689e-01, PNorm = 94.8070, GNorm = 0.3641, lr_0 = 9.3819e-04
Loss = 1.3712e-01, PNorm = 94.9196, GNorm = 0.9391, lr_0 = 9.3755e-04
Loss = 1.3710e-01, PNorm = 95.0453, GNorm = 0.6442, lr_0 = 9.3691e-04
Loss = 1.5773e-01, PNorm = 95.1570, GNorm = 1.2910, lr_0 = 9.3627e-04
Loss = 1.4452e-01, PNorm = 95.2919, GNorm = 0.7570, lr_0 = 9.3562e-04
Loss = 1.4508e-01, PNorm = 95.4219, GNorm = 0.6283, lr_0 = 9.3498e-04
Loss = 1.7299e-01, PNorm = 95.5537, GNorm = 1.1449, lr_0 = 9.3434e-04
Loss = 1.6258e-01, PNorm = 95.6995, GNorm = 0.6643, lr_0 = 9.3370e-04
Loss = 1.5599e-01, PNorm = 95.8467, GNorm = 0.6168, lr_0 = 9.3306e-04
Loss = 1.3905e-01, PNorm = 95.9876, GNorm = 0.7232, lr_0 = 9.3242e-04
Loss = 1.4050e-01, PNorm = 96.1150, GNorm = 0.7126, lr_0 = 9.3178e-04
Loss = 1.4487e-01, PNorm = 96.2364, GNorm = 1.1027, lr_0 = 9.3115e-04
Loss = 1.2877e-01, PNorm = 96.3720, GNorm = 0.5629, lr_0 = 9.3051e-04
Loss = 1.3026e-01, PNorm = 96.4969, GNorm = 0.5587, lr_0 = 9.2987e-04
Loss = 1.4971e-01, PNorm = 96.6096, GNorm = 0.9523, lr_0 = 9.2923e-04
Loss = 1.2455e-01, PNorm = 96.7379, GNorm = 1.1508, lr_0 = 9.2860e-04
Loss = 1.3331e-01, PNorm = 96.8378, GNorm = 0.8945, lr_0 = 9.2796e-04
Loss = 1.5674e-01, PNorm = 96.9563, GNorm = 1.6793, lr_0 = 9.2733e-04
Loss = 1.4570e-01, PNorm = 97.0789, GNorm = 0.5492, lr_0 = 9.2669e-04
Loss = 1.4263e-01, PNorm = 97.2105, GNorm = 0.6479, lr_0 = 9.2606e-04
Loss = 1.4002e-01, PNorm = 97.3299, GNorm = 0.6907, lr_0 = 9.2542e-04
Loss = 1.4726e-01, PNorm = 97.4569, GNorm = 0.9316, lr_0 = 9.2479e-04
Loss = 1.4291e-01, PNorm = 97.5815, GNorm = 0.9802, lr_0 = 9.2415e-04
Loss = 1.5076e-01, PNorm = 97.7088, GNorm = 0.9250, lr_0 = 9.2352e-04
Loss = 1.3390e-01, PNorm = 97.8253, GNorm = 0.6621, lr_0 = 9.2289e-04
Loss = 1.4510e-01, PNorm = 97.9477, GNorm = 0.5265, lr_0 = 9.2226e-04
Loss = 1.3468e-01, PNorm = 98.0888, GNorm = 0.4533, lr_0 = 9.2162e-04
Loss = 1.4301e-01, PNorm = 98.2085, GNorm = 0.7518, lr_0 = 9.2099e-04
Validation mae = 0.296130
Epoch 3
Loss = 7.4683e-02, PNorm = 98.3278, GNorm = 0.5061, lr_0 = 9.2036e-04
Loss = 9.1180e-02, PNorm = 98.4144, GNorm = 0.3872, lr_0 = 9.1973e-04
Loss = 9.3195e-02, PNorm = 98.5033, GNorm = 0.6750, lr_0 = 9.1910e-04
Loss = 8.1809e-02, PNorm = 98.5794, GNorm = 0.5040, lr_0 = 9.1847e-04
Loss = 8.5647e-02, PNorm = 98.6576, GNorm = 0.3889, lr_0 = 9.1784e-04
Loss = 8.3102e-02, PNorm = 98.7391, GNorm = 0.4374, lr_0 = 9.1721e-04
Loss = 8.8031e-02, PNorm = 98.8209, GNorm = 0.5752, lr_0 = 9.1658e-04
Loss = 7.9991e-02, PNorm = 98.8977, GNorm = 0.5413, lr_0 = 9.1596e-04
Loss = 7.9895e-02, PNorm = 98.9822, GNorm = 0.9521, lr_0 = 9.1533e-04
Loss = 8.3807e-02, PNorm = 99.0549, GNorm = 0.4706, lr_0 = 9.1470e-04
Loss = 8.8055e-02, PNorm = 99.1323, GNorm = 0.5489, lr_0 = 9.1408e-04
Loss = 9.1533e-02, PNorm = 99.2221, GNorm = 0.5402, lr_0 = 9.1345e-04
Loss = 8.8241e-02, PNorm = 99.3055, GNorm = 1.0401, lr_0 = 9.1282e-04
Loss = 7.5100e-02, PNorm = 99.3841, GNorm = 0.4833, lr_0 = 9.1220e-04
Loss = 7.0127e-02, PNorm = 99.4612, GNorm = 0.8920, lr_0 = 9.1157e-04
Loss = 7.7266e-02, PNorm = 99.5382, GNorm = 0.6286, lr_0 = 9.1095e-04
Loss = 8.2739e-02, PNorm = 99.6201, GNorm = 0.4328, lr_0 = 9.1032e-04
Loss = 9.0444e-02, PNorm = 99.6940, GNorm = 0.5385, lr_0 = 9.0970e-04
Loss = 9.3505e-02, PNorm = 99.7931, GNorm = 0.8265, lr_0 = 9.0908e-04
Loss = 7.8257e-02, PNorm = 99.8690, GNorm = 0.7060, lr_0 = 9.0846e-04
Loss = 8.2267e-02, PNorm = 99.9523, GNorm = 0.4456, lr_0 = 9.0783e-04
Loss = 8.2256e-02, PNorm = 100.0323, GNorm = 0.4561, lr_0 = 9.0721e-04
Loss = 7.2860e-02, PNorm = 100.1091, GNorm = 0.3806, lr_0 = 9.0659e-04
Loss = 7.2396e-02, PNorm = 100.1868, GNorm = 0.5238, lr_0 = 9.0597e-04
Loss = 8.5331e-02, PNorm = 100.2577, GNorm = 0.4228, lr_0 = 9.0535e-04
Loss = 8.4261e-02, PNorm = 100.3497, GNorm = 0.4943, lr_0 = 9.0473e-04
Loss = 7.3006e-02, PNorm = 100.4399, GNorm = 0.5872, lr_0 = 9.0411e-04
Loss = 6.9480e-02, PNorm = 100.5223, GNorm = 0.3426, lr_0 = 9.0349e-04
Loss = 8.1911e-02, PNorm = 100.6070, GNorm = 0.5201, lr_0 = 9.0287e-04
Loss = 7.2489e-02, PNorm = 100.6841, GNorm = 0.5313, lr_0 = 9.0225e-04
Loss = 7.5627e-02, PNorm = 100.7684, GNorm = 0.6218, lr_0 = 9.0163e-04
Loss = 9.4387e-02, PNorm = 100.8557, GNorm = 0.7381, lr_0 = 9.0102e-04
Loss = 7.3686e-02, PNorm = 100.9464, GNorm = 0.5159, lr_0 = 9.0040e-04
Loss = 8.2734e-02, PNorm = 101.0233, GNorm = 0.6682, lr_0 = 8.9978e-04
Loss = 8.3605e-02, PNorm = 101.1142, GNorm = 0.5558, lr_0 = 8.9916e-04
Loss = 7.2232e-02, PNorm = 101.1909, GNorm = 0.5516, lr_0 = 8.9855e-04
Loss = 9.1338e-02, PNorm = 101.2813, GNorm = 0.9149, lr_0 = 8.9793e-04
Loss = 8.3000e-02, PNorm = 101.3669, GNorm = 1.2930, lr_0 = 8.9732e-04
Loss = 8.1765e-02, PNorm = 101.4606, GNorm = 0.4604, lr_0 = 8.9670e-04
Loss = 8.1991e-02, PNorm = 101.5436, GNorm = 0.6218, lr_0 = 8.9609e-04
Loss = 8.3947e-02, PNorm = 101.6426, GNorm = 0.3079, lr_0 = 8.9548e-04
Loss = 8.9445e-02, PNorm = 101.7235, GNorm = 0.6502, lr_0 = 8.9486e-04
Loss = 7.2346e-02, PNorm = 101.8166, GNorm = 0.3292, lr_0 = 8.9425e-04
Loss = 8.3298e-02, PNorm = 101.9004, GNorm = 1.1894, lr_0 = 8.9364e-04
Loss = 8.9393e-02, PNorm = 101.9824, GNorm = 0.4071, lr_0 = 8.9302e-04
Loss = 9.2328e-02, PNorm = 102.0718, GNorm = 0.6721, lr_0 = 8.9241e-04
Loss = 8.3065e-02, PNorm = 102.1597, GNorm = 0.5636, lr_0 = 8.9180e-04
Loss = 7.2301e-02, PNorm = 102.2554, GNorm = 0.6184, lr_0 = 8.9119e-04
Loss = 8.7751e-02, PNorm = 102.3322, GNorm = 0.4094, lr_0 = 8.9058e-04
Loss = 8.1203e-02, PNorm = 102.4229, GNorm = 0.9650, lr_0 = 8.8997e-04
Loss = 8.1783e-02, PNorm = 102.5033, GNorm = 0.6159, lr_0 = 8.8936e-04
Loss = 8.7247e-02, PNorm = 102.6079, GNorm = 0.4910, lr_0 = 8.8875e-04
Loss = 8.4817e-02, PNorm = 102.7053, GNorm = 0.6536, lr_0 = 8.8814e-04
Loss = 8.3934e-02, PNorm = 102.8086, GNorm = 0.4150, lr_0 = 8.8753e-04
Loss = 9.2267e-02, PNorm = 102.9051, GNorm = 0.4193, lr_0 = 8.8693e-04
Loss = 8.6912e-02, PNorm = 102.9967, GNorm = 0.8889, lr_0 = 8.8632e-04
Loss = 8.2395e-02, PNorm = 103.0894, GNorm = 0.2934, lr_0 = 8.8571e-04
Loss = 8.7101e-02, PNorm = 103.1886, GNorm = 0.5189, lr_0 = 8.8510e-04
Loss = 7.9968e-02, PNorm = 103.2766, GNorm = 0.4947, lr_0 = 8.8450e-04
Loss = 7.5584e-02, PNorm = 103.3700, GNorm = 0.4448, lr_0 = 8.8389e-04
Loss = 7.1296e-02, PNorm = 103.4573, GNorm = 0.8094, lr_0 = 8.8329e-04
Loss = 9.7025e-02, PNorm = 103.5513, GNorm = 0.4160, lr_0 = 8.8268e-04
Loss = 1.0371e-01, PNorm = 103.6406, GNorm = 0.4428, lr_0 = 8.8208e-04
Loss = 8.4177e-02, PNorm = 103.7374, GNorm = 0.4450, lr_0 = 8.8147e-04
Loss = 8.4566e-02, PNorm = 103.8289, GNorm = 0.4307, lr_0 = 8.8087e-04
Loss = 8.3223e-02, PNorm = 103.9288, GNorm = 0.8448, lr_0 = 8.8026e-04
Loss = 8.1718e-02, PNorm = 104.0318, GNorm = 0.4447, lr_0 = 8.7966e-04
Loss = 7.5933e-02, PNorm = 104.1267, GNorm = 0.5233, lr_0 = 8.7906e-04
Loss = 9.2432e-02, PNorm = 104.2260, GNorm = 0.4258, lr_0 = 8.7846e-04
Loss = 1.0215e-01, PNorm = 104.3190, GNorm = 0.6905, lr_0 = 8.7785e-04
Loss = 9.8334e-02, PNorm = 104.4348, GNorm = 0.4040, lr_0 = 8.7725e-04
Loss = 8.6313e-02, PNorm = 104.5367, GNorm = 0.4841, lr_0 = 8.7665e-04
Loss = 8.3525e-02, PNorm = 104.6332, GNorm = 0.7441, lr_0 = 8.7605e-04
Loss = 9.1605e-02, PNorm = 104.7288, GNorm = 0.6518, lr_0 = 8.7545e-04
Loss = 8.8130e-02, PNorm = 104.8381, GNorm = 0.6034, lr_0 = 8.7485e-04
Loss = 9.4351e-02, PNorm = 104.9312, GNorm = 0.8160, lr_0 = 8.7425e-04
Loss = 8.7524e-02, PNorm = 105.0409, GNorm = 0.7729, lr_0 = 8.7365e-04
Loss = 9.4818e-02, PNorm = 105.1403, GNorm = 0.4086, lr_0 = 8.7306e-04
Loss = 8.6928e-02, PNorm = 105.2407, GNorm = 0.4703, lr_0 = 8.7246e-04
Loss = 1.0020e-01, PNorm = 105.3414, GNorm = 0.5682, lr_0 = 8.7186e-04
Loss = 9.1390e-02, PNorm = 105.4408, GNorm = 0.5546, lr_0 = 8.7126e-04
Loss = 9.1070e-02, PNorm = 105.5396, GNorm = 0.5580, lr_0 = 8.7067e-04
Loss = 9.6487e-02, PNorm = 105.6431, GNorm = 0.5252, lr_0 = 8.7007e-04
Loss = 9.1110e-02, PNorm = 105.7466, GNorm = 0.6581, lr_0 = 8.6947e-04
Loss = 7.5474e-02, PNorm = 105.8468, GNorm = 0.6808, lr_0 = 8.6888e-04
Loss = 8.1506e-02, PNorm = 105.9407, GNorm = 0.4598, lr_0 = 8.6828e-04
Loss = 9.6033e-02, PNorm = 106.0420, GNorm = 0.4501, lr_0 = 8.6769e-04
Loss = 8.9768e-02, PNorm = 106.1442, GNorm = 0.7798, lr_0 = 8.6709e-04
Loss = 7.7221e-02, PNorm = 106.2346, GNorm = 0.6799, lr_0 = 8.6650e-04
Loss = 9.6121e-02, PNorm = 106.3385, GNorm = 0.7938, lr_0 = 8.6590e-04
Loss = 9.2769e-02, PNorm = 106.4480, GNorm = 0.6739, lr_0 = 8.6531e-04
Loss = 9.9731e-02, PNorm = 106.5696, GNorm = 0.5945, lr_0 = 8.6472e-04
Loss = 9.8899e-02, PNorm = 106.6843, GNorm = 0.9274, lr_0 = 8.6413e-04
Loss = 8.1457e-02, PNorm = 106.7993, GNorm = 0.6674, lr_0 = 8.6353e-04
Loss = 8.8702e-02, PNorm = 106.9156, GNorm = 0.4600, lr_0 = 8.6294e-04
Loss = 9.0162e-02, PNorm = 107.0124, GNorm = 0.7010, lr_0 = 8.6235e-04
Loss = 9.3623e-02, PNorm = 107.1163, GNorm = 0.8441, lr_0 = 8.6176e-04
Loss = 8.1235e-02, PNorm = 107.2194, GNorm = 0.4079, lr_0 = 8.6117e-04
Loss = 7.8479e-02, PNorm = 107.3260, GNorm = 0.6017, lr_0 = 8.6058e-04
Loss = 9.5455e-02, PNorm = 107.4315, GNorm = 0.5177, lr_0 = 8.5999e-04
Loss = 9.1473e-02, PNorm = 107.5365, GNorm = 0.7756, lr_0 = 8.5940e-04
Loss = 1.2512e-01, PNorm = 107.6485, GNorm = 0.4731, lr_0 = 8.5881e-04
Loss = 9.7568e-02, PNorm = 107.7645, GNorm = 0.5351, lr_0 = 8.5823e-04
Loss = 9.6650e-02, PNorm = 107.8842, GNorm = 0.4783, lr_0 = 8.5764e-04
Loss = 1.0148e-01, PNorm = 108.0098, GNorm = 0.5754, lr_0 = 8.5705e-04
Loss = 1.0459e-01, PNorm = 108.1199, GNorm = 0.4804, lr_0 = 8.5646e-04
Loss = 9.0334e-02, PNorm = 108.2395, GNorm = 0.9373, lr_0 = 8.5588e-04
Loss = 1.0218e-01, PNorm = 108.3437, GNorm = 0.8548, lr_0 = 8.5529e-04
Loss = 8.6656e-02, PNorm = 108.4541, GNorm = 0.4892, lr_0 = 8.5470e-04
Loss = 7.8332e-02, PNorm = 108.5523, GNorm = 0.5104, lr_0 = 8.5412e-04
Loss = 9.6304e-02, PNorm = 108.6586, GNorm = 0.8578, lr_0 = 8.5353e-04
Loss = 9.9293e-02, PNorm = 108.7724, GNorm = 0.7567, lr_0 = 8.5295e-04
Loss = 7.9932e-02, PNorm = 108.8878, GNorm = 0.8537, lr_0 = 8.5236e-04
Loss = 9.8054e-02, PNorm = 109.0012, GNorm = 0.7016, lr_0 = 8.5178e-04
Loss = 7.5768e-02, PNorm = 109.1044, GNorm = 0.5159, lr_0 = 8.5120e-04
Loss = 1.0051e-01, PNorm = 109.2066, GNorm = 0.4012, lr_0 = 8.5061e-04
Loss = 9.5286e-02, PNorm = 109.3152, GNorm = 0.5447, lr_0 = 8.5003e-04
Loss = 1.0275e-01, PNorm = 109.4256, GNorm = 0.6560, lr_0 = 8.4945e-04
Loss = 9.2282e-02, PNorm = 109.5431, GNorm = 0.7296, lr_0 = 8.4887e-04
Loss = 9.5739e-02, PNorm = 109.6588, GNorm = 0.5411, lr_0 = 8.4828e-04
Validation mae = 0.292887
Epoch 4
Loss = 6.8017e-02, PNorm = 109.7469, GNorm = 0.3591, lr_0 = 8.4770e-04
Loss = 5.8429e-02, PNorm = 109.8292, GNorm = 0.3549, lr_0 = 8.4712e-04
Loss = 6.2858e-02, PNorm = 109.8933, GNorm = 0.4923, lr_0 = 8.4654e-04
Loss = 5.9702e-02, PNorm = 109.9631, GNorm = 0.3273, lr_0 = 8.4596e-04
Loss = 6.5569e-02, PNorm = 110.0298, GNorm = 0.9229, lr_0 = 8.4538e-04
Loss = 4.8744e-02, PNorm = 110.0940, GNorm = 0.3621, lr_0 = 8.4480e-04
Loss = 5.8208e-02, PNorm = 110.1480, GNorm = 0.5712, lr_0 = 8.4423e-04
Loss = 5.9936e-02, PNorm = 110.2147, GNorm = 0.3173, lr_0 = 8.4365e-04
Loss = 4.9845e-02, PNorm = 110.2847, GNorm = 0.2786, lr_0 = 8.4307e-04
Loss = 5.2932e-02, PNorm = 110.3492, GNorm = 0.2974, lr_0 = 8.4249e-04
Loss = 5.1142e-02, PNorm = 110.3997, GNorm = 0.3528, lr_0 = 8.4191e-04
Loss = 5.5599e-02, PNorm = 110.4569, GNorm = 0.4107, lr_0 = 8.4134e-04
Loss = 6.5043e-02, PNorm = 110.5155, GNorm = 0.5991, lr_0 = 8.4076e-04
Loss = 5.3880e-02, PNorm = 110.5843, GNorm = 0.7982, lr_0 = 8.4019e-04
Loss = 5.2443e-02, PNorm = 110.6555, GNorm = 0.6010, lr_0 = 8.3961e-04
Loss = 5.5127e-02, PNorm = 110.7238, GNorm = 0.5053, lr_0 = 8.3903e-04
Loss = 5.7205e-02, PNorm = 110.7887, GNorm = 0.3848, lr_0 = 8.3846e-04
Loss = 4.9398e-02, PNorm = 110.8422, GNorm = 0.2858, lr_0 = 8.3789e-04
Loss = 6.1240e-02, PNorm = 110.9031, GNorm = 0.6580, lr_0 = 8.3731e-04
Loss = 6.2625e-02, PNorm = 110.9732, GNorm = 0.3260, lr_0 = 8.3674e-04
Loss = 5.5587e-02, PNorm = 111.0448, GNorm = 0.7186, lr_0 = 8.3616e-04
Loss = 5.5174e-02, PNorm = 111.1193, GNorm = 0.6084, lr_0 = 8.3559e-04
Loss = 5.3344e-02, PNorm = 111.1755, GNorm = 0.3189, lr_0 = 8.3502e-04
Loss = 5.4881e-02, PNorm = 111.2419, GNorm = 0.5920, lr_0 = 8.3445e-04
Loss = 5.9821e-02, PNorm = 111.3049, GNorm = 0.3728, lr_0 = 8.3388e-04
Loss = 5.5941e-02, PNorm = 111.3784, GNorm = 0.3050, lr_0 = 8.3330e-04
Loss = 5.3768e-02, PNorm = 111.4392, GNorm = 0.3429, lr_0 = 8.3273e-04
Loss = 5.6768e-02, PNorm = 111.5068, GNorm = 0.4976, lr_0 = 8.3216e-04
Loss = 5.0929e-02, PNorm = 111.5720, GNorm = 0.5886, lr_0 = 8.3159e-04
Loss = 5.1111e-02, PNorm = 111.6367, GNorm = 0.4516, lr_0 = 8.3102e-04
Loss = 6.2505e-02, PNorm = 111.7017, GNorm = 0.6974, lr_0 = 8.3045e-04
Loss = 5.3975e-02, PNorm = 111.7688, GNorm = 0.3920, lr_0 = 8.2988e-04
Loss = 4.9747e-02, PNorm = 111.8349, GNorm = 0.4936, lr_0 = 8.2932e-04
Loss = 5.5036e-02, PNorm = 111.8950, GNorm = 0.9574, lr_0 = 8.2875e-04
Loss = 4.6404e-02, PNorm = 111.9581, GNorm = 0.3919, lr_0 = 8.2818e-04
Loss = 6.0884e-02, PNorm = 112.0302, GNorm = 2.2339, lr_0 = 8.2761e-04
Loss = 5.3523e-02, PNorm = 112.0934, GNorm = 0.5501, lr_0 = 8.2705e-04
Loss = 5.2374e-02, PNorm = 112.1820, GNorm = 0.6338, lr_0 = 8.2648e-04
Loss = 5.0832e-02, PNorm = 112.2531, GNorm = 0.4136, lr_0 = 8.2591e-04
Loss = 5.1890e-02, PNorm = 112.3250, GNorm = 0.5132, lr_0 = 8.2535e-04
Loss = 5.5241e-02, PNorm = 112.4067, GNorm = 0.5507, lr_0 = 8.2478e-04
Loss = 5.2987e-02, PNorm = 112.4853, GNorm = 0.5242, lr_0 = 8.2422e-04
Loss = 5.5436e-02, PNorm = 112.5707, GNorm = 0.3430, lr_0 = 8.2365e-04
Loss = 5.9116e-02, PNorm = 112.6467, GNorm = 0.7803, lr_0 = 8.2309e-04
Loss = 5.2737e-02, PNorm = 112.7219, GNorm = 0.5942, lr_0 = 8.2252e-04
Loss = 5.0420e-02, PNorm = 112.8005, GNorm = 0.7716, lr_0 = 8.2196e-04
Loss = 6.9417e-02, PNorm = 112.8706, GNorm = 0.4740, lr_0 = 8.2140e-04
Loss = 5.6404e-02, PNorm = 112.9484, GNorm = 0.3340, lr_0 = 8.2084e-04
Loss = 6.2792e-02, PNorm = 113.0323, GNorm = 0.6662, lr_0 = 8.2027e-04
Loss = 5.7332e-02, PNorm = 113.1162, GNorm = 0.6238, lr_0 = 8.1971e-04
Loss = 5.3651e-02, PNorm = 113.1810, GNorm = 0.2989, lr_0 = 8.1915e-04
Loss = 5.4502e-02, PNorm = 113.2560, GNorm = 0.4812, lr_0 = 8.1859e-04
Loss = 5.7268e-02, PNorm = 113.3266, GNorm = 0.4704, lr_0 = 8.1803e-04
Loss = 6.3023e-02, PNorm = 113.3983, GNorm = 0.5795, lr_0 = 8.1747e-04
Loss = 6.0495e-02, PNorm = 113.4717, GNorm = 0.5891, lr_0 = 8.1691e-04
Loss = 6.7581e-02, PNorm = 113.5550, GNorm = 0.4313, lr_0 = 8.1635e-04
Loss = 6.5476e-02, PNorm = 113.6398, GNorm = 0.4014, lr_0 = 8.1579e-04
Loss = 4.7223e-02, PNorm = 113.7334, GNorm = 0.5105, lr_0 = 8.1523e-04
Loss = 6.0016e-02, PNorm = 113.8061, GNorm = 0.5937, lr_0 = 8.1467e-04
Loss = 6.0270e-02, PNorm = 113.8828, GNorm = 0.5472, lr_0 = 8.1411e-04
Loss = 6.0943e-02, PNorm = 113.9526, GNorm = 0.5942, lr_0 = 8.1355e-04
Loss = 5.7004e-02, PNorm = 114.0345, GNorm = 0.5857, lr_0 = 8.1300e-04
Loss = 6.0340e-02, PNorm = 114.1090, GNorm = 0.4442, lr_0 = 8.1244e-04
Loss = 5.8210e-02, PNorm = 114.1922, GNorm = 0.4595, lr_0 = 8.1188e-04
Loss = 4.9776e-02, PNorm = 114.2744, GNorm = 0.3956, lr_0 = 8.1133e-04
Loss = 5.7130e-02, PNorm = 114.3535, GNorm = 0.5054, lr_0 = 8.1077e-04
Loss = 5.0089e-02, PNorm = 114.4354, GNorm = 0.3803, lr_0 = 8.1022e-04
Loss = 5.3097e-02, PNorm = 114.5162, GNorm = 0.4268, lr_0 = 8.0966e-04
Loss = 5.3558e-02, PNorm = 114.5966, GNorm = 0.2757, lr_0 = 8.0911e-04
Loss = 6.2356e-02, PNorm = 114.6717, GNorm = 0.5176, lr_0 = 8.0855e-04
Loss = 6.3262e-02, PNorm = 114.7617, GNorm = 0.4392, lr_0 = 8.0800e-04
Loss = 5.7953e-02, PNorm = 114.8441, GNorm = 0.4269, lr_0 = 8.0745e-04
Loss = 6.4383e-02, PNorm = 114.9340, GNorm = 0.3638, lr_0 = 8.0689e-04
Loss = 5.9141e-02, PNorm = 115.0174, GNorm = 0.2853, lr_0 = 8.0634e-04
Loss = 5.2577e-02, PNorm = 115.1049, GNorm = 0.3039, lr_0 = 8.0579e-04
Loss = 5.8283e-02, PNorm = 115.1875, GNorm = 0.3254, lr_0 = 8.0523e-04
Loss = 7.1154e-02, PNorm = 115.2763, GNorm = 0.5246, lr_0 = 8.0468e-04
Loss = 6.2147e-02, PNorm = 115.3670, GNorm = 0.6556, lr_0 = 8.0413e-04
Loss = 7.1079e-02, PNorm = 115.4533, GNorm = 0.4268, lr_0 = 8.0358e-04
Loss = 5.8164e-02, PNorm = 115.5349, GNorm = 1.0170, lr_0 = 8.0303e-04
Loss = 5.1776e-02, PNorm = 115.6234, GNorm = 0.3499, lr_0 = 8.0248e-04
Loss = 7.2363e-02, PNorm = 115.7104, GNorm = 0.3869, lr_0 = 8.0193e-04
Loss = 6.1143e-02, PNorm = 115.8007, GNorm = 0.3405, lr_0 = 8.0138e-04
Loss = 6.1663e-02, PNorm = 115.8907, GNorm = 0.7904, lr_0 = 8.0083e-04
Loss = 7.4606e-02, PNorm = 115.9825, GNorm = 0.3373, lr_0 = 8.0028e-04
Loss = 5.8801e-02, PNorm = 116.0755, GNorm = 0.4962, lr_0 = 7.9974e-04
Loss = 5.7165e-02, PNorm = 116.1631, GNorm = 0.4647, lr_0 = 7.9919e-04
Loss = 6.5486e-02, PNorm = 116.2542, GNorm = 1.1454, lr_0 = 7.9864e-04
Loss = 5.9122e-02, PNorm = 116.3345, GNorm = 0.5775, lr_0 = 7.9809e-04
Loss = 6.7695e-02, PNorm = 116.4273, GNorm = 0.9515, lr_0 = 7.9755e-04
Loss = 6.6236e-02, PNorm = 116.5154, GNorm = 0.6082, lr_0 = 7.9700e-04
Loss = 6.2234e-02, PNorm = 116.6173, GNorm = 0.5438, lr_0 = 7.9645e-04
Loss = 5.5506e-02, PNorm = 116.7037, GNorm = 0.4391, lr_0 = 7.9591e-04
Loss = 6.8728e-02, PNorm = 116.8016, GNorm = 0.5199, lr_0 = 7.9536e-04
Loss = 7.1414e-02, PNorm = 116.8986, GNorm = 0.5921, lr_0 = 7.9482e-04
Loss = 6.5751e-02, PNorm = 116.9864, GNorm = 0.5518, lr_0 = 7.9427e-04
Loss = 6.8883e-02, PNorm = 117.0810, GNorm = 0.4782, lr_0 = 7.9373e-04
Loss = 6.9778e-02, PNorm = 117.1796, GNorm = 0.5649, lr_0 = 7.9319e-04
Loss = 6.1719e-02, PNorm = 117.2704, GNorm = 0.3943, lr_0 = 7.9264e-04
Loss = 6.8284e-02, PNorm = 117.3500, GNorm = 0.4261, lr_0 = 7.9210e-04
Loss = 6.2492e-02, PNorm = 117.4525, GNorm = 0.7951, lr_0 = 7.9156e-04
Loss = 6.7521e-02, PNorm = 117.5430, GNorm = 0.4221, lr_0 = 7.9101e-04
Loss = 6.7145e-02, PNorm = 117.6403, GNorm = 0.6329, lr_0 = 7.9047e-04
Loss = 5.6685e-02, PNorm = 117.7384, GNorm = 0.2618, lr_0 = 7.8993e-04
Loss = 7.1853e-02, PNorm = 117.8400, GNorm = 0.8197, lr_0 = 7.8939e-04
Loss = 6.6128e-02, PNorm = 117.9466, GNorm = 0.5674, lr_0 = 7.8885e-04
Loss = 7.5547e-02, PNorm = 118.0483, GNorm = 0.4211, lr_0 = 7.8831e-04
Loss = 6.3003e-02, PNorm = 118.1468, GNorm = 1.0354, lr_0 = 7.8777e-04
Loss = 6.3669e-02, PNorm = 118.2423, GNorm = 0.4288, lr_0 = 7.8723e-04
Loss = 6.8631e-02, PNorm = 118.3404, GNorm = 0.4859, lr_0 = 7.8669e-04
Loss = 6.7341e-02, PNorm = 118.4407, GNorm = 0.8366, lr_0 = 7.8615e-04
Loss = 7.3083e-02, PNorm = 118.5505, GNorm = 1.1204, lr_0 = 7.8561e-04
Loss = 6.7841e-02, PNorm = 118.6623, GNorm = 0.3789, lr_0 = 7.8507e-04
Loss = 6.1920e-02, PNorm = 118.7702, GNorm = 0.7372, lr_0 = 7.8454e-04
Loss = 6.0175e-02, PNorm = 118.8656, GNorm = 0.4838, lr_0 = 7.8400e-04
Loss = 7.5601e-02, PNorm = 118.9653, GNorm = 0.9921, lr_0 = 7.8346e-04
Loss = 7.1437e-02, PNorm = 119.0618, GNorm = 0.5861, lr_0 = 7.8293e-04
Loss = 6.3889e-02, PNorm = 119.1506, GNorm = 0.4564, lr_0 = 7.8239e-04
Loss = 6.8379e-02, PNorm = 119.2434, GNorm = 0.7376, lr_0 = 7.8185e-04
Loss = 5.9623e-02, PNorm = 119.3360, GNorm = 0.7227, lr_0 = 7.8132e-04
Validation mae = 0.290835
Epoch 5
Loss = 5.3981e-02, PNorm = 119.4198, GNorm = 0.4060, lr_0 = 7.8078e-04
Loss = 4.0247e-02, PNorm = 119.4920, GNorm = 0.3556, lr_0 = 7.8025e-04
Loss = 4.0712e-02, PNorm = 119.5581, GNorm = 0.3900, lr_0 = 7.7971e-04
Loss = 3.8935e-02, PNorm = 119.6135, GNorm = 0.4938, lr_0 = 7.7918e-04
Loss = 4.1867e-02, PNorm = 119.6663, GNorm = 0.6728, lr_0 = 7.7864e-04
Loss = 4.1491e-02, PNorm = 119.7177, GNorm = 0.3142, lr_0 = 7.7811e-04
Loss = 5.1617e-02, PNorm = 119.7678, GNorm = 0.3475, lr_0 = 7.7758e-04
Loss = 4.3165e-02, PNorm = 119.8261, GNorm = 0.2515, lr_0 = 7.7705e-04
Loss = 4.5524e-02, PNorm = 119.8899, GNorm = 0.4341, lr_0 = 7.7651e-04
Loss = 4.5628e-02, PNorm = 119.9568, GNorm = 0.5008, lr_0 = 7.7598e-04
Loss = 5.0589e-02, PNorm = 120.0285, GNorm = 0.5047, lr_0 = 7.7545e-04
Loss = 4.2735e-02, PNorm = 120.0878, GNorm = 0.4997, lr_0 = 7.7492e-04
Loss = 4.2180e-02, PNorm = 120.1468, GNorm = 0.2867, lr_0 = 7.7439e-04
Loss = 4.3969e-02, PNorm = 120.2058, GNorm = 0.4332, lr_0 = 7.7386e-04
Loss = 3.9732e-02, PNorm = 120.2729, GNorm = 0.6017, lr_0 = 7.7333e-04
Loss = 4.9246e-02, PNorm = 120.3338, GNorm = 0.8005, lr_0 = 7.7280e-04
Loss = 3.7621e-02, PNorm = 120.3978, GNorm = 0.4113, lr_0 = 7.7227e-04
Loss = 4.3490e-02, PNorm = 120.4587, GNorm = 0.6179, lr_0 = 7.7174e-04
Loss = 4.0296e-02, PNorm = 120.5263, GNorm = 0.3660, lr_0 = 7.7121e-04
Loss = 4.4203e-02, PNorm = 120.5897, GNorm = 0.2531, lr_0 = 7.7068e-04
Loss = 4.3710e-02, PNorm = 120.6581, GNorm = 0.3585, lr_0 = 7.7015e-04
Loss = 3.8754e-02, PNorm = 120.7199, GNorm = 0.6092, lr_0 = 7.6963e-04
Loss = 4.9617e-02, PNorm = 120.7889, GNorm = 0.6881, lr_0 = 7.6910e-04
Loss = 3.9413e-02, PNorm = 120.8567, GNorm = 0.2141, lr_0 = 7.6857e-04
Loss = 3.6433e-02, PNorm = 120.9178, GNorm = 0.4000, lr_0 = 7.6805e-04
Loss = 3.9532e-02, PNorm = 120.9807, GNorm = 0.2965, lr_0 = 7.6752e-04
Loss = 4.4084e-02, PNorm = 121.0427, GNorm = 0.7833, lr_0 = 7.6699e-04
Loss = 4.2152e-02, PNorm = 121.1059, GNorm = 0.3826, lr_0 = 7.6647e-04
Loss = 4.0386e-02, PNorm = 121.1683, GNorm = 0.5931, lr_0 = 7.6594e-04
Loss = 3.8614e-02, PNorm = 121.2301, GNorm = 0.2544, lr_0 = 7.6542e-04
Loss = 4.1027e-02, PNorm = 121.2906, GNorm = 0.2908, lr_0 = 7.6489e-04
Loss = 3.8520e-02, PNorm = 121.3533, GNorm = 0.2841, lr_0 = 7.6437e-04
Loss = 4.3335e-02, PNorm = 121.4133, GNorm = 0.3559, lr_0 = 7.6385e-04
Loss = 4.1433e-02, PNorm = 121.4771, GNorm = 0.2947, lr_0 = 7.6332e-04
Loss = 4.0787e-02, PNorm = 121.5463, GNorm = 0.3184, lr_0 = 7.6280e-04
Loss = 3.9368e-02, PNorm = 121.6140, GNorm = 0.7450, lr_0 = 7.6228e-04
Loss = 4.5368e-02, PNorm = 121.6881, GNorm = 0.5714, lr_0 = 7.6176e-04
Loss = 4.1830e-02, PNorm = 121.7523, GNorm = 0.3777, lr_0 = 7.6123e-04
Loss = 4.1254e-02, PNorm = 121.8207, GNorm = 0.7096, lr_0 = 7.6071e-04
Loss = 4.5734e-02, PNorm = 121.8832, GNorm = 0.3173, lr_0 = 7.6019e-04
Loss = 4.0040e-02, PNorm = 121.9497, GNorm = 0.6659, lr_0 = 7.5967e-04
Loss = 4.3506e-02, PNorm = 122.0103, GNorm = 0.4061, lr_0 = 7.5915e-04
Loss = 3.6024e-02, PNorm = 122.0739, GNorm = 0.3016, lr_0 = 7.5863e-04
Loss = 3.5386e-02, PNorm = 122.1307, GNorm = 0.5583, lr_0 = 7.5811e-04
Loss = 4.5385e-02, PNorm = 122.1806, GNorm = 0.6442, lr_0 = 7.5759e-04
Loss = 4.1905e-02, PNorm = 122.2402, GNorm = 0.4977, lr_0 = 7.5707e-04
Loss = 4.1170e-02, PNorm = 122.3049, GNorm = 0.6825, lr_0 = 7.5655e-04
Loss = 4.1618e-02, PNorm = 122.3697, GNorm = 0.3387, lr_0 = 7.5603e-04
Loss = 4.1553e-02, PNorm = 122.4360, GNorm = 0.4635, lr_0 = 7.5552e-04
Loss = 3.7775e-02, PNorm = 122.4937, GNorm = 0.3165, lr_0 = 7.5500e-04
Loss = 4.1947e-02, PNorm = 122.5583, GNorm = 0.6464, lr_0 = 7.5448e-04
Loss = 4.2370e-02, PNorm = 122.6266, GNorm = 0.4271, lr_0 = 7.5397e-04
Loss = 3.7080e-02, PNorm = 122.6969, GNorm = 0.3117, lr_0 = 7.5345e-04
Loss = 5.0368e-02, PNorm = 122.7585, GNorm = 0.3084, lr_0 = 7.5293e-04
Loss = 4.2961e-02, PNorm = 122.8304, GNorm = 0.2922, lr_0 = 7.5242e-04
Loss = 4.4804e-02, PNorm = 122.8981, GNorm = 0.4399, lr_0 = 7.5190e-04
Loss = 4.6086e-02, PNorm = 122.9757, GNorm = 0.3926, lr_0 = 7.5139e-04
Loss = 4.2208e-02, PNorm = 123.0507, GNorm = 0.4042, lr_0 = 7.5087e-04
Loss = 4.7535e-02, PNorm = 123.1205, GNorm = 1.0675, lr_0 = 7.5036e-04
Loss = 5.5365e-02, PNorm = 123.2050, GNorm = 0.2953, lr_0 = 7.4984e-04
Loss = 5.0755e-02, PNorm = 123.2853, GNorm = 0.6817, lr_0 = 7.4933e-04
Loss = 5.0784e-02, PNorm = 123.3715, GNorm = 0.2771, lr_0 = 7.4882e-04
Loss = 4.8217e-02, PNorm = 123.4460, GNorm = 1.0231, lr_0 = 7.4830e-04
Loss = 4.0310e-02, PNorm = 123.5191, GNorm = 0.4394, lr_0 = 7.4779e-04
Loss = 4.5278e-02, PNorm = 123.5879, GNorm = 0.3918, lr_0 = 7.4728e-04
Loss = 3.9566e-02, PNorm = 123.6621, GNorm = 0.5799, lr_0 = 7.4677e-04
Loss = 4.0183e-02, PNorm = 123.7264, GNorm = 0.4258, lr_0 = 7.4625e-04
Loss = 4.2127e-02, PNorm = 123.7951, GNorm = 0.4747, lr_0 = 7.4574e-04
Loss = 4.3701e-02, PNorm = 123.8684, GNorm = 0.3340, lr_0 = 7.4523e-04
Loss = 4.2547e-02, PNorm = 123.9409, GNorm = 0.2913, lr_0 = 7.4472e-04
Loss = 4.8026e-02, PNorm = 124.0107, GNorm = 0.4706, lr_0 = 7.4421e-04
Loss = 4.5277e-02, PNorm = 124.0845, GNorm = 0.3962, lr_0 = 7.4370e-04
Loss = 4.3372e-02, PNorm = 124.1567, GNorm = 0.6548, lr_0 = 7.4319e-04
Loss = 4.6133e-02, PNorm = 124.2338, GNorm = 0.4222, lr_0 = 7.4268e-04
Loss = 4.1839e-02, PNorm = 124.3135, GNorm = 0.4064, lr_0 = 7.4217e-04
Loss = 4.6338e-02, PNorm = 124.3817, GNorm = 0.5506, lr_0 = 7.4167e-04
Loss = 4.1912e-02, PNorm = 124.4565, GNorm = 0.5200, lr_0 = 7.4116e-04
Loss = 4.2815e-02, PNorm = 124.5343, GNorm = 0.2667, lr_0 = 7.4065e-04
Loss = 4.3672e-02, PNorm = 124.6060, GNorm = 0.3449, lr_0 = 7.4014e-04
Loss = 4.9286e-02, PNorm = 124.6855, GNorm = 0.3373, lr_0 = 7.3964e-04
Loss = 4.2469e-02, PNorm = 124.7621, GNorm = 0.3037, lr_0 = 7.3913e-04
Loss = 4.8988e-02, PNorm = 124.8425, GNorm = 0.2926, lr_0 = 7.3862e-04
Loss = 4.8258e-02, PNorm = 124.9134, GNorm = 0.8683, lr_0 = 7.3812e-04
Loss = 4.6295e-02, PNorm = 124.9804, GNorm = 0.3156, lr_0 = 7.3761e-04
Loss = 4.9799e-02, PNorm = 125.0568, GNorm = 0.4788, lr_0 = 7.3711e-04
Loss = 4.6498e-02, PNorm = 125.1365, GNorm = 0.5179, lr_0 = 7.3660e-04
Loss = 4.8118e-02, PNorm = 125.2134, GNorm = 0.3459, lr_0 = 7.3610e-04
Loss = 4.2749e-02, PNorm = 125.2905, GNorm = 0.5421, lr_0 = 7.3559e-04
Loss = 4.8946e-02, PNorm = 125.3597, GNorm = 0.3308, lr_0 = 7.3509e-04
Loss = 4.1762e-02, PNorm = 125.4340, GNorm = 0.5870, lr_0 = 7.3458e-04
Loss = 4.9083e-02, PNorm = 125.5115, GNorm = 0.6737, lr_0 = 7.3408e-04
Loss = 4.5243e-02, PNorm = 125.5956, GNorm = 0.2972, lr_0 = 7.3358e-04
Loss = 5.0504e-02, PNorm = 125.6803, GNorm = 0.5633, lr_0 = 7.3308e-04
Loss = 4.8943e-02, PNorm = 125.7636, GNorm = 0.9782, lr_0 = 7.3257e-04
Loss = 4.4819e-02, PNorm = 125.8393, GNorm = 0.8120, lr_0 = 7.3207e-04
Loss = 3.8833e-02, PNorm = 125.9165, GNorm = 0.4561, lr_0 = 7.3157e-04
Loss = 4.6592e-02, PNorm = 125.9966, GNorm = 0.4651, lr_0 = 7.3107e-04
Loss = 5.0000e-02, PNorm = 126.0809, GNorm = 0.4109, lr_0 = 7.3057e-04
Loss = 4.4190e-02, PNorm = 126.1649, GNorm = 0.3909, lr_0 = 7.3007e-04
Loss = 3.8507e-02, PNorm = 126.2402, GNorm = 0.2608, lr_0 = 7.2957e-04
Loss = 5.0706e-02, PNorm = 126.3142, GNorm = 0.3646, lr_0 = 7.2907e-04
Loss = 4.7042e-02, PNorm = 126.3951, GNorm = 0.9440, lr_0 = 7.2857e-04
Loss = 4.1663e-02, PNorm = 126.4768, GNorm = 0.3942, lr_0 = 7.2807e-04
Loss = 4.6583e-02, PNorm = 126.5534, GNorm = 0.6519, lr_0 = 7.2757e-04
Loss = 4.3319e-02, PNorm = 126.6400, GNorm = 0.4847, lr_0 = 7.2707e-04
Loss = 4.6918e-02, PNorm = 126.7277, GNorm = 0.5935, lr_0 = 7.2657e-04
Loss = 5.5366e-02, PNorm = 126.8101, GNorm = 0.9243, lr_0 = 7.2608e-04
Loss = 4.8322e-02, PNorm = 126.8937, GNorm = 0.6558, lr_0 = 7.2558e-04
Loss = 4.9695e-02, PNorm = 126.9863, GNorm = 0.7552, lr_0 = 7.2508e-04
Loss = 5.0780e-02, PNorm = 127.0723, GNorm = 0.4358, lr_0 = 7.2458e-04
Loss = 4.2928e-02, PNorm = 127.1545, GNorm = 0.8578, lr_0 = 7.2409e-04
Loss = 5.3270e-02, PNorm = 127.2324, GNorm = 0.2320, lr_0 = 7.2359e-04
Loss = 5.2144e-02, PNorm = 127.3150, GNorm = 0.4651, lr_0 = 7.2310e-04
Loss = 4.2162e-02, PNorm = 127.3959, GNorm = 0.5925, lr_0 = 7.2260e-04
Loss = 4.7391e-02, PNorm = 127.4788, GNorm = 0.5853, lr_0 = 7.2211e-04
Loss = 4.7915e-02, PNorm = 127.5620, GNorm = 0.8888, lr_0 = 7.2161e-04
Loss = 3.8064e-02, PNorm = 127.6416, GNorm = 0.4486, lr_0 = 7.2112e-04
Loss = 4.9969e-02, PNorm = 127.7185, GNorm = 0.9524, lr_0 = 7.2062e-04
Loss = 4.9484e-02, PNorm = 127.8005, GNorm = 0.5104, lr_0 = 7.2013e-04
Loss = 4.4927e-02, PNorm = 127.8950, GNorm = 0.3798, lr_0 = 7.1964e-04
Validation mae = 0.288614
Epoch 6
Loss = 3.8010e-02, PNorm = 127.9725, GNorm = 0.2957, lr_0 = 7.1914e-04
Loss = 3.8040e-02, PNorm = 128.0360, GNorm = 0.8427, lr_0 = 7.1865e-04
Loss = 3.8810e-02, PNorm = 128.0972, GNorm = 0.2633, lr_0 = 7.1816e-04
Loss = 3.2943e-02, PNorm = 128.1565, GNorm = 0.4588, lr_0 = 7.1767e-04
Loss = 4.0043e-02, PNorm = 128.2151, GNorm = 0.2334, lr_0 = 7.1717e-04
Loss = 3.5165e-02, PNorm = 128.2744, GNorm = 0.7995, lr_0 = 7.1668e-04
Loss = 3.9304e-02, PNorm = 128.3282, GNorm = 0.3918, lr_0 = 7.1619e-04
Loss = 3.4465e-02, PNorm = 128.3877, GNorm = 0.4770, lr_0 = 7.1570e-04
Loss = 3.2140e-02, PNorm = 128.4405, GNorm = 0.4817, lr_0 = 7.1521e-04
Loss = 3.2465e-02, PNorm = 128.4884, GNorm = 0.5090, lr_0 = 7.1472e-04
Loss = 2.7939e-02, PNorm = 128.5413, GNorm = 0.6318, lr_0 = 7.1423e-04
Loss = 3.6841e-02, PNorm = 128.6031, GNorm = 0.3304, lr_0 = 7.1374e-04
Loss = 3.4137e-02, PNorm = 128.6579, GNorm = 0.4069, lr_0 = 7.1325e-04
Loss = 3.6754e-02, PNorm = 128.7262, GNorm = 0.8367, lr_0 = 7.1277e-04
Loss = 3.4347e-02, PNorm = 128.7864, GNorm = 0.2964, lr_0 = 7.1228e-04
Loss = 2.6870e-02, PNorm = 128.8430, GNorm = 0.3689, lr_0 = 7.1179e-04
Loss = 2.7840e-02, PNorm = 128.8981, GNorm = 0.1973, lr_0 = 7.1130e-04
Loss = 2.9019e-02, PNorm = 128.9478, GNorm = 0.4508, lr_0 = 7.1081e-04
Loss = 3.2814e-02, PNorm = 128.9900, GNorm = 0.4479, lr_0 = 7.1033e-04
Loss = 3.0738e-02, PNorm = 129.0332, GNorm = 0.6668, lr_0 = 7.0984e-04
Loss = 3.7910e-02, PNorm = 129.0853, GNorm = 0.8687, lr_0 = 7.0935e-04
Loss = 3.0912e-02, PNorm = 129.1437, GNorm = 0.3878, lr_0 = 7.0887e-04
Loss = 3.7863e-02, PNorm = 129.2007, GNorm = 0.5663, lr_0 = 7.0838e-04
Loss = 2.8584e-02, PNorm = 129.2617, GNorm = 0.5501, lr_0 = 7.0790e-04
Loss = 4.2998e-02, PNorm = 129.3127, GNorm = 0.5385, lr_0 = 7.0741e-04
Loss = 3.2819e-02, PNorm = 129.3639, GNorm = 0.4744, lr_0 = 7.0693e-04
Loss = 2.9308e-02, PNorm = 129.4176, GNorm = 0.4756, lr_0 = 7.0644e-04
Loss = 3.1025e-02, PNorm = 129.4724, GNorm = 0.5021, lr_0 = 7.0596e-04
Loss = 3.0302e-02, PNorm = 129.5225, GNorm = 0.5694, lr_0 = 7.0548e-04
Loss = 3.0646e-02, PNorm = 129.5751, GNorm = 0.4289, lr_0 = 7.0499e-04
Loss = 2.7554e-02, PNorm = 129.6232, GNorm = 0.6641, lr_0 = 7.0451e-04
Loss = 2.8422e-02, PNorm = 129.6765, GNorm = 0.3846, lr_0 = 7.0403e-04
Loss = 3.7371e-02, PNorm = 129.7286, GNorm = 0.3443, lr_0 = 7.0354e-04
Loss = 3.1120e-02, PNorm = 129.7852, GNorm = 0.4572, lr_0 = 7.0306e-04
Loss = 3.3864e-02, PNorm = 129.8343, GNorm = 0.3852, lr_0 = 7.0258e-04
Loss = 3.5357e-02, PNorm = 129.8921, GNorm = 0.4699, lr_0 = 7.0210e-04
Loss = 2.9887e-02, PNorm = 129.9500, GNorm = 0.4844, lr_0 = 7.0162e-04
Loss = 3.0359e-02, PNorm = 130.0024, GNorm = 0.2895, lr_0 = 7.0114e-04
Loss = 2.6911e-02, PNorm = 130.0507, GNorm = 0.3679, lr_0 = 7.0066e-04
Loss = 2.8636e-02, PNorm = 130.0983, GNorm = 0.3941, lr_0 = 7.0018e-04
Loss = 3.4113e-02, PNorm = 130.1451, GNorm = 0.4082, lr_0 = 6.9970e-04
Loss = 3.5252e-02, PNorm = 130.2047, GNorm = 0.8194, lr_0 = 6.9922e-04
Loss = 3.0015e-02, PNorm = 130.2651, GNorm = 0.2231, lr_0 = 6.9874e-04
Loss = 3.0040e-02, PNorm = 130.3176, GNorm = 0.6241, lr_0 = 6.9826e-04
Loss = 3.1992e-02, PNorm = 130.3685, GNorm = 0.2476, lr_0 = 6.9778e-04
Loss = 3.2109e-02, PNorm = 130.4199, GNorm = 0.3057, lr_0 = 6.9730e-04
Loss = 2.9719e-02, PNorm = 130.4740, GNorm = 1.1139, lr_0 = 6.9683e-04
Loss = 3.3371e-02, PNorm = 130.5267, GNorm = 0.3840, lr_0 = 6.9635e-04
Loss = 3.5047e-02, PNorm = 130.5847, GNorm = 0.2452, lr_0 = 6.9587e-04
Loss = 3.3319e-02, PNorm = 130.6516, GNorm = 0.3421, lr_0 = 6.9540e-04
Loss = 3.4399e-02, PNorm = 130.7133, GNorm = 0.3767, lr_0 = 6.9492e-04
Loss = 3.0522e-02, PNorm = 130.7730, GNorm = 0.2650, lr_0 = 6.9444e-04
Loss = 3.1164e-02, PNorm = 130.8269, GNorm = 0.5773, lr_0 = 6.9397e-04
Loss = 3.2876e-02, PNorm = 130.8833, GNorm = 0.4102, lr_0 = 6.9349e-04
Loss = 3.5713e-02, PNorm = 130.9400, GNorm = 0.3174, lr_0 = 6.9302e-04
Loss = 3.4343e-02, PNorm = 130.9967, GNorm = 0.3392, lr_0 = 6.9254e-04
Loss = 3.6175e-02, PNorm = 131.0638, GNorm = 0.7826, lr_0 = 6.9207e-04
Loss = 3.7377e-02, PNorm = 131.1232, GNorm = 0.3039, lr_0 = 6.9159e-04
Loss = 3.5219e-02, PNorm = 131.1951, GNorm = 0.3622, lr_0 = 6.9112e-04
Loss = 2.8894e-02, PNorm = 131.2664, GNorm = 0.6493, lr_0 = 6.9065e-04
Loss = 3.3068e-02, PNorm = 131.3305, GNorm = 0.2161, lr_0 = 6.9017e-04
Loss = 3.0926e-02, PNorm = 131.3930, GNorm = 0.9596, lr_0 = 6.8970e-04
Loss = 3.5734e-02, PNorm = 131.4544, GNorm = 0.4066, lr_0 = 6.8923e-04
Loss = 3.3322e-02, PNorm = 131.5177, GNorm = 0.6397, lr_0 = 6.8876e-04
Loss = 3.8618e-02, PNorm = 131.5840, GNorm = 0.5473, lr_0 = 6.8828e-04
Loss = 3.2933e-02, PNorm = 131.6486, GNorm = 0.6384, lr_0 = 6.8781e-04
Loss = 3.4745e-02, PNorm = 131.7068, GNorm = 0.3327, lr_0 = 6.8734e-04
Loss = 2.8186e-02, PNorm = 131.7661, GNorm = 0.2014, lr_0 = 6.8687e-04
Loss = 3.1262e-02, PNorm = 131.8252, GNorm = 0.4134, lr_0 = 6.8640e-04
Loss = 3.2111e-02, PNorm = 131.8835, GNorm = 1.0114, lr_0 = 6.8593e-04
Loss = 3.8740e-02, PNorm = 131.9469, GNorm = 0.6005, lr_0 = 6.8546e-04
Loss = 3.6577e-02, PNorm = 132.0162, GNorm = 0.5560, lr_0 = 6.8499e-04
Loss = 3.2215e-02, PNorm = 132.0785, GNorm = 0.3206, lr_0 = 6.8452e-04
Loss = 3.8250e-02, PNorm = 132.1357, GNorm = 0.5614, lr_0 = 6.8405e-04
Loss = 3.2086e-02, PNorm = 132.1927, GNorm = 0.2503, lr_0 = 6.8358e-04
Loss = 3.0863e-02, PNorm = 132.2454, GNorm = 0.3967, lr_0 = 6.8312e-04
Loss = 2.9838e-02, PNorm = 132.3055, GNorm = 0.2663, lr_0 = 6.8265e-04
Loss = 3.4467e-02, PNorm = 132.3683, GNorm = 0.8111, lr_0 = 6.8218e-04
Loss = 3.1276e-02, PNorm = 132.4414, GNorm = 0.3803, lr_0 = 6.8171e-04
Loss = 3.7132e-02, PNorm = 132.5048, GNorm = 0.3928, lr_0 = 6.8125e-04
Loss = 4.1565e-02, PNorm = 132.5771, GNorm = 0.6701, lr_0 = 6.8078e-04
Loss = 3.9635e-02, PNorm = 132.6523, GNorm = 0.4583, lr_0 = 6.8031e-04
Loss = 4.3212e-02, PNorm = 132.7300, GNorm = 0.7687, lr_0 = 6.7985e-04
Loss = 3.3336e-02, PNorm = 132.7985, GNorm = 0.1934, lr_0 = 6.7938e-04
Loss = 3.7013e-02, PNorm = 132.8698, GNorm = 0.5804, lr_0 = 6.7892e-04
Loss = 3.6497e-02, PNorm = 132.9326, GNorm = 0.4759, lr_0 = 6.7845e-04
Loss = 3.3634e-02, PNorm = 133.0025, GNorm = 0.6183, lr_0 = 6.7799e-04
Loss = 3.8579e-02, PNorm = 133.0653, GNorm = 0.5782, lr_0 = 6.7752e-04
Loss = 3.2646e-02, PNorm = 133.1310, GNorm = 0.5736, lr_0 = 6.7706e-04
Loss = 3.5212e-02, PNorm = 133.2010, GNorm = 0.3005, lr_0 = 6.7659e-04
Loss = 3.7099e-02, PNorm = 133.2724, GNorm = 0.2535, lr_0 = 6.7613e-04
Loss = 3.2071e-02, PNorm = 133.3475, GNorm = 0.5092, lr_0 = 6.7567e-04
Loss = 3.5536e-02, PNorm = 133.4176, GNorm = 0.3138, lr_0 = 6.7520e-04
Loss = 4.0273e-02, PNorm = 133.4856, GNorm = 0.6171, lr_0 = 6.7474e-04
Loss = 3.4691e-02, PNorm = 133.5531, GNorm = 0.5214, lr_0 = 6.7428e-04
Loss = 3.1956e-02, PNorm = 133.6183, GNorm = 0.5341, lr_0 = 6.7382e-04
Loss = 3.4307e-02, PNorm = 133.6841, GNorm = 0.5932, lr_0 = 6.7335e-04
Loss = 3.3593e-02, PNorm = 133.7585, GNorm = 0.8347, lr_0 = 6.7289e-04
Loss = 3.2062e-02, PNorm = 133.8185, GNorm = 0.4819, lr_0 = 6.7243e-04
Loss = 3.2099e-02, PNorm = 133.8844, GNorm = 0.6597, lr_0 = 6.7197e-04
Loss = 3.9087e-02, PNorm = 133.9471, GNorm = 0.2753, lr_0 = 6.7151e-04
Loss = 4.0472e-02, PNorm = 134.0184, GNorm = 0.7798, lr_0 = 6.7105e-04
Loss = 3.3141e-02, PNorm = 134.0916, GNorm = 0.3402, lr_0 = 6.7059e-04
Loss = 3.8609e-02, PNorm = 134.1663, GNorm = 0.2357, lr_0 = 6.7013e-04
Loss = 3.0569e-02, PNorm = 134.2335, GNorm = 0.4130, lr_0 = 6.6967e-04
Loss = 3.7782e-02, PNorm = 134.3002, GNorm = 0.6046, lr_0 = 6.6921e-04
Loss = 3.4184e-02, PNorm = 134.3755, GNorm = 0.3030, lr_0 = 6.6876e-04
Loss = 3.5724e-02, PNorm = 134.4545, GNorm = 0.4938, lr_0 = 6.6830e-04
Loss = 3.7490e-02, PNorm = 134.5205, GNorm = 0.4663, lr_0 = 6.6784e-04
Loss = 3.7815e-02, PNorm = 134.6013, GNorm = 0.4075, lr_0 = 6.6738e-04
Loss = 4.0491e-02, PNorm = 134.6713, GNorm = 0.2331, lr_0 = 6.6693e-04
Loss = 3.5263e-02, PNorm = 134.7507, GNorm = 0.5004, lr_0 = 6.6647e-04
Loss = 3.7091e-02, PNorm = 134.8235, GNorm = 0.4226, lr_0 = 6.6601e-04
Loss = 3.9585e-02, PNorm = 134.9007, GNorm = 0.5013, lr_0 = 6.6556e-04
Loss = 3.9021e-02, PNorm = 134.9776, GNorm = 0.2255, lr_0 = 6.6510e-04
Loss = 4.6443e-02, PNorm = 135.0525, GNorm = 0.3324, lr_0 = 6.6464e-04
Loss = 3.6732e-02, PNorm = 135.1308, GNorm = 0.4803, lr_0 = 6.6419e-04
Loss = 3.4996e-02, PNorm = 135.2110, GNorm = 0.6333, lr_0 = 6.6373e-04
Loss = 3.7147e-02, PNorm = 135.2896, GNorm = 0.3588, lr_0 = 6.6328e-04
Loss = 5.0966e-02, PNorm = 135.3619, GNorm = 0.4782, lr_0 = 6.6282e-04
Validation mae = 0.286628
Epoch 7
Loss = 2.9562e-02, PNorm = 135.4278, GNorm = 0.2510, lr_0 = 6.6237e-04
Loss = 2.7919e-02, PNorm = 135.4794, GNorm = 0.5088, lr_0 = 6.6192e-04
Loss = 2.8567e-02, PNorm = 135.5248, GNorm = 0.3931, lr_0 = 6.6146e-04
Loss = 2.9928e-02, PNorm = 135.5636, GNorm = 0.3026, lr_0 = 6.6101e-04
Loss = 2.9417e-02, PNorm = 135.6084, GNorm = 0.3885, lr_0 = 6.6056e-04
Loss = 3.0590e-02, PNorm = 135.6583, GNorm = 0.3465, lr_0 = 6.6011e-04
Loss = 3.2793e-02, PNorm = 135.7096, GNorm = 0.5339, lr_0 = 6.5965e-04
Loss = 2.9507e-02, PNorm = 135.7561, GNorm = 0.8546, lr_0 = 6.5920e-04
Loss = 2.8565e-02, PNorm = 135.8051, GNorm = 0.3405, lr_0 = 6.5875e-04
Loss = 2.6749e-02, PNorm = 135.8544, GNorm = 0.7302, lr_0 = 6.5830e-04
Loss = 2.6097e-02, PNorm = 135.9062, GNorm = 0.2713, lr_0 = 6.5785e-04
Loss = 2.7842e-02, PNorm = 135.9561, GNorm = 0.2084, lr_0 = 6.5740e-04
Loss = 2.7810e-02, PNorm = 136.0051, GNorm = 0.3241, lr_0 = 6.5695e-04
Loss = 2.7859e-02, PNorm = 136.0494, GNorm = 0.3643, lr_0 = 6.5650e-04
Loss = 2.8215e-02, PNorm = 136.1010, GNorm = 0.2042, lr_0 = 6.5605e-04
Loss = 2.5650e-02, PNorm = 136.1548, GNorm = 0.9201, lr_0 = 6.5560e-04
Loss = 2.8741e-02, PNorm = 136.2012, GNorm = 0.6127, lr_0 = 6.5515e-04
Loss = 3.0448e-02, PNorm = 136.2417, GNorm = 0.4987, lr_0 = 6.5470e-04
Loss = 2.3184e-02, PNorm = 136.2915, GNorm = 0.2995, lr_0 = 6.5425e-04
Loss = 2.7219e-02, PNorm = 136.3389, GNorm = 0.7152, lr_0 = 6.5380e-04
Loss = 2.5869e-02, PNorm = 136.3898, GNorm = 0.2181, lr_0 = 6.5335e-04
Loss = 2.8159e-02, PNorm = 136.4316, GNorm = 0.7930, lr_0 = 6.5291e-04
Loss = 3.0790e-02, PNorm = 136.4837, GNorm = 0.5290, lr_0 = 6.5246e-04
Loss = 2.5402e-02, PNorm = 136.5340, GNorm = 0.2276, lr_0 = 6.5201e-04
Loss = 2.5821e-02, PNorm = 136.5791, GNorm = 0.2420, lr_0 = 6.5157e-04
Loss = 2.7650e-02, PNorm = 136.6209, GNorm = 0.4113, lr_0 = 6.5112e-04
Loss = 2.4153e-02, PNorm = 136.6728, GNorm = 0.5731, lr_0 = 6.5067e-04
Loss = 2.4232e-02, PNorm = 136.7191, GNorm = 0.1787, lr_0 = 6.5023e-04
Loss = 2.6481e-02, PNorm = 136.7639, GNorm = 0.3324, lr_0 = 6.4978e-04
Loss = 2.0385e-02, PNorm = 136.8087, GNorm = 0.5460, lr_0 = 6.4934e-04
Loss = 2.4622e-02, PNorm = 136.8596, GNorm = 0.3224, lr_0 = 6.4889e-04
Loss = 2.8500e-02, PNorm = 136.9025, GNorm = 0.5214, lr_0 = 6.4845e-04
Loss = 2.3314e-02, PNorm = 136.9402, GNorm = 0.2765, lr_0 = 6.4800e-04
Loss = 2.2220e-02, PNorm = 136.9798, GNorm = 0.6099, lr_0 = 6.4756e-04
Loss = 2.7847e-02, PNorm = 137.0208, GNorm = 0.3903, lr_0 = 6.4712e-04
Loss = 2.3572e-02, PNorm = 137.0612, GNorm = 0.5476, lr_0 = 6.4667e-04
Loss = 2.5325e-02, PNorm = 137.1051, GNorm = 0.6020, lr_0 = 6.4623e-04
Loss = 2.6001e-02, PNorm = 137.1549, GNorm = 0.2465, lr_0 = 6.4579e-04
Loss = 2.9485e-02, PNorm = 137.2089, GNorm = 0.5620, lr_0 = 6.4534e-04
Loss = 2.8730e-02, PNorm = 137.2743, GNorm = 0.7003, lr_0 = 6.4490e-04
Loss = 2.8229e-02, PNorm = 137.3244, GNorm = 0.9481, lr_0 = 6.4446e-04
Loss = 2.7093e-02, PNorm = 137.3791, GNorm = 0.2894, lr_0 = 6.4402e-04
Loss = 3.0632e-02, PNorm = 137.4385, GNorm = 0.4858, lr_0 = 6.4358e-04
Loss = 2.2113e-02, PNorm = 137.4863, GNorm = 0.2765, lr_0 = 6.4314e-04
Loss = 2.4286e-02, PNorm = 137.5361, GNorm = 0.5635, lr_0 = 6.4270e-04
Loss = 2.6554e-02, PNorm = 137.5820, GNorm = 0.4553, lr_0 = 6.4226e-04
Loss = 2.1742e-02, PNorm = 137.6333, GNorm = 0.3988, lr_0 = 6.4182e-04
Loss = 2.5480e-02, PNorm = 137.6849, GNorm = 0.8176, lr_0 = 6.4138e-04
Loss = 2.6210e-02, PNorm = 137.7398, GNorm = 0.3687, lr_0 = 6.4094e-04
Loss = 2.7623e-02, PNorm = 137.7897, GNorm = 0.1941, lr_0 = 6.4050e-04
Loss = 2.3437e-02, PNorm = 137.8370, GNorm = 0.3120, lr_0 = 6.4006e-04
Loss = 2.3519e-02, PNorm = 137.8803, GNorm = 0.2585, lr_0 = 6.3962e-04
Loss = 3.2378e-02, PNorm = 137.9231, GNorm = 0.3922, lr_0 = 6.3918e-04
Loss = 2.8838e-02, PNorm = 137.9755, GNorm = 0.5944, lr_0 = 6.3874e-04
Loss = 3.1450e-02, PNorm = 138.0309, GNorm = 0.2428, lr_0 = 6.3831e-04
Loss = 2.5372e-02, PNorm = 138.0842, GNorm = 0.5451, lr_0 = 6.3787e-04
Loss = 2.8682e-02, PNorm = 138.1361, GNorm = 0.3778, lr_0 = 6.3743e-04
Loss = 2.7853e-02, PNorm = 138.1931, GNorm = 0.3312, lr_0 = 6.3700e-04
Loss = 2.5521e-02, PNorm = 138.2403, GNorm = 0.5735, lr_0 = 6.3656e-04
Loss = 2.4998e-02, PNorm = 138.2928, GNorm = 0.1515, lr_0 = 6.3612e-04
Loss = 2.5294e-02, PNorm = 138.3357, GNorm = 0.2730, lr_0 = 6.3569e-04
Loss = 3.4389e-02, PNorm = 138.3889, GNorm = 0.2937, lr_0 = 6.3525e-04
Loss = 2.8643e-02, PNorm = 138.4447, GNorm = 0.4690, lr_0 = 6.3482e-04
Loss = 2.9118e-02, PNorm = 138.5013, GNorm = 0.3467, lr_0 = 6.3438e-04
Loss = 2.6184e-02, PNorm = 138.5532, GNorm = 0.3909, lr_0 = 6.3395e-04
Loss = 2.6599e-02, PNorm = 138.6064, GNorm = 0.3561, lr_0 = 6.3351e-04
Loss = 2.8366e-02, PNorm = 138.6592, GNorm = 0.6305, lr_0 = 6.3308e-04
Loss = 2.7833e-02, PNorm = 138.7153, GNorm = 0.4102, lr_0 = 6.3265e-04
Loss = 2.3053e-02, PNorm = 138.7752, GNorm = 0.3580, lr_0 = 6.3221e-04
Loss = 2.7504e-02, PNorm = 138.8280, GNorm = 0.4288, lr_0 = 6.3178e-04
Loss = 2.2710e-02, PNorm = 138.8830, GNorm = 0.2435, lr_0 = 6.3135e-04
Loss = 2.9147e-02, PNorm = 138.9355, GNorm = 0.2976, lr_0 = 6.3091e-04
Loss = 3.2716e-02, PNorm = 138.9875, GNorm = 0.2772, lr_0 = 6.3048e-04
Loss = 2.8032e-02, PNorm = 139.0418, GNorm = 0.3307, lr_0 = 6.3005e-04
Loss = 2.8364e-02, PNorm = 139.0952, GNorm = 0.3065, lr_0 = 6.2962e-04
Loss = 3.1455e-02, PNorm = 139.1506, GNorm = 0.4183, lr_0 = 6.2919e-04
Loss = 3.0939e-02, PNorm = 139.2135, GNorm = 0.2376, lr_0 = 6.2876e-04
Loss = 2.6978e-02, PNorm = 139.2764, GNorm = 0.5583, lr_0 = 6.2833e-04
Loss = 2.4713e-02, PNorm = 139.3424, GNorm = 0.6143, lr_0 = 6.2789e-04
Loss = 2.6669e-02, PNorm = 139.3989, GNorm = 0.2317, lr_0 = 6.2746e-04
Loss = 2.6213e-02, PNorm = 139.4547, GNorm = 0.2275, lr_0 = 6.2703e-04
Loss = 2.9634e-02, PNorm = 139.5096, GNorm = 0.5356, lr_0 = 6.2661e-04
Loss = 2.3879e-02, PNorm = 139.5681, GNorm = 0.3683, lr_0 = 6.2618e-04
Loss = 2.4573e-02, PNorm = 139.6191, GNorm = 0.2010, lr_0 = 6.2575e-04
Loss = 2.5260e-02, PNorm = 139.6660, GNorm = 0.3179, lr_0 = 6.2532e-04
Loss = 2.5730e-02, PNorm = 139.7110, GNorm = 0.4402, lr_0 = 6.2489e-04
Loss = 3.3638e-02, PNorm = 139.7717, GNorm = 0.2523, lr_0 = 6.2446e-04
Loss = 3.1479e-02, PNorm = 139.8306, GNorm = 0.5413, lr_0 = 6.2403e-04
Loss = 3.0759e-02, PNorm = 139.8928, GNorm = 0.4961, lr_0 = 6.2361e-04
Loss = 2.8766e-02, PNorm = 139.9536, GNorm = 0.3293, lr_0 = 6.2318e-04
Loss = 2.4568e-02, PNorm = 140.0163, GNorm = 0.2588, lr_0 = 6.2275e-04
Loss = 3.1238e-02, PNorm = 140.0748, GNorm = 0.6101, lr_0 = 6.2233e-04
Loss = 3.0554e-02, PNorm = 140.1370, GNorm = 0.8440, lr_0 = 6.2190e-04
Loss = 3.0392e-02, PNorm = 140.1971, GNorm = 0.4383, lr_0 = 6.2147e-04
Loss = 3.0708e-02, PNorm = 140.2640, GNorm = 0.6909, lr_0 = 6.2105e-04
Loss = 2.8054e-02, PNorm = 140.3263, GNorm = 0.2712, lr_0 = 6.2062e-04
Loss = 2.5574e-02, PNorm = 140.3898, GNorm = 0.2169, lr_0 = 6.2020e-04
Loss = 2.3498e-02, PNorm = 140.4482, GNorm = 0.4223, lr_0 = 6.1977e-04
Loss = 2.9652e-02, PNorm = 140.5023, GNorm = 0.4081, lr_0 = 6.1935e-04
Loss = 2.7058e-02, PNorm = 140.5637, GNorm = 0.3014, lr_0 = 6.1892e-04
Loss = 3.0860e-02, PNorm = 140.6253, GNorm = 0.2027, lr_0 = 6.1850e-04
Loss = 2.5150e-02, PNorm = 140.6876, GNorm = 0.2760, lr_0 = 6.1808e-04
Loss = 2.9723e-02, PNorm = 140.7526, GNorm = 0.2348, lr_0 = 6.1765e-04
Loss = 2.6313e-02, PNorm = 140.8154, GNorm = 0.5460, lr_0 = 6.1723e-04
Loss = 2.7584e-02, PNorm = 140.8736, GNorm = 0.3195, lr_0 = 6.1681e-04
Loss = 3.1113e-02, PNorm = 140.9293, GNorm = 0.6591, lr_0 = 6.1638e-04
Loss = 2.2648e-02, PNorm = 140.9947, GNorm = 0.2619, lr_0 = 6.1596e-04
Loss = 3.0668e-02, PNorm = 141.0527, GNorm = 0.1899, lr_0 = 6.1554e-04
Loss = 2.8781e-02, PNorm = 141.1167, GNorm = 0.6650, lr_0 = 6.1512e-04
Loss = 2.6289e-02, PNorm = 141.1759, GNorm = 0.3602, lr_0 = 6.1470e-04
Loss = 2.9062e-02, PNorm = 141.2284, GNorm = 0.4615, lr_0 = 6.1428e-04
Loss = 2.8309e-02, PNorm = 141.2952, GNorm = 0.5480, lr_0 = 6.1385e-04
Loss = 2.8551e-02, PNorm = 141.3562, GNorm = 0.2168, lr_0 = 6.1343e-04
Loss = 2.7187e-02, PNorm = 141.4161, GNorm = 0.2453, lr_0 = 6.1301e-04
Loss = 2.7111e-02, PNorm = 141.4718, GNorm = 0.5703, lr_0 = 6.1259e-04
Loss = 3.2851e-02, PNorm = 141.5285, GNorm = 0.3182, lr_0 = 6.1217e-04
Loss = 2.6412e-02, PNorm = 141.5871, GNorm = 0.3770, lr_0 = 6.1175e-04
Loss = 2.7622e-02, PNorm = 141.6447, GNorm = 0.5668, lr_0 = 6.1134e-04
Loss = 2.6621e-02, PNorm = 141.6985, GNorm = 0.4196, lr_0 = 6.1092e-04
Loss = 3.0823e-02, PNorm = 141.7494, GNorm = 0.4415, lr_0 = 6.1050e-04
Validation mae = 0.284843
Epoch 8
Loss = 2.2804e-02, PNorm = 141.7958, GNorm = 0.7274, lr_0 = 6.1008e-04
Loss = 2.0208e-02, PNorm = 141.8259, GNorm = 0.4731, lr_0 = 6.0966e-04
Loss = 2.5405e-02, PNorm = 141.8670, GNorm = 0.3172, lr_0 = 6.0924e-04
Loss = 2.2541e-02, PNorm = 141.9032, GNorm = 0.5237, lr_0 = 6.0883e-04
Loss = 2.3386e-02, PNorm = 141.9422, GNorm = 0.4931, lr_0 = 6.0841e-04
Loss = 2.3213e-02, PNorm = 141.9839, GNorm = 0.5638, lr_0 = 6.0799e-04
Loss = 2.1979e-02, PNorm = 142.0242, GNorm = 0.6661, lr_0 = 6.0758e-04
Loss = 2.5644e-02, PNorm = 142.0608, GNorm = 0.2343, lr_0 = 6.0716e-04
Loss = 2.3399e-02, PNorm = 142.1039, GNorm = 0.7503, lr_0 = 6.0674e-04
Loss = 2.5537e-02, PNorm = 142.1470, GNorm = 0.2611, lr_0 = 6.0633e-04
Loss = 2.3215e-02, PNorm = 142.1895, GNorm = 0.3112, lr_0 = 6.0591e-04
Loss = 2.3994e-02, PNorm = 142.2225, GNorm = 0.6134, lr_0 = 6.0550e-04
Loss = 2.4759e-02, PNorm = 142.2684, GNorm = 0.4133, lr_0 = 6.0508e-04
Loss = 2.0500e-02, PNorm = 142.3093, GNorm = 0.3519, lr_0 = 6.0467e-04
Loss = 2.0284e-02, PNorm = 142.3500, GNorm = 0.9617, lr_0 = 6.0425e-04
Loss = 2.5272e-02, PNorm = 142.3849, GNorm = 0.6862, lr_0 = 6.0384e-04
Loss = 2.3721e-02, PNorm = 142.4317, GNorm = 0.3033, lr_0 = 6.0343e-04
Loss = 2.0381e-02, PNorm = 142.4673, GNorm = 0.3298, lr_0 = 6.0301e-04
Loss = 2.1993e-02, PNorm = 142.5063, GNorm = 0.5773, lr_0 = 6.0260e-04
Loss = 2.0361e-02, PNorm = 142.5372, GNorm = 0.3130, lr_0 = 6.0219e-04
Loss = 2.2486e-02, PNorm = 142.5800, GNorm = 0.3716, lr_0 = 6.0178e-04
Loss = 2.3038e-02, PNorm = 142.6184, GNorm = 0.4025, lr_0 = 6.0136e-04
Loss = 2.2066e-02, PNorm = 142.6617, GNorm = 0.8931, lr_0 = 6.0095e-04
Loss = 2.2791e-02, PNorm = 142.7050, GNorm = 0.6028, lr_0 = 6.0054e-04
Loss = 2.2022e-02, PNorm = 142.7510, GNorm = 0.8475, lr_0 = 6.0013e-04
Loss = 1.7279e-02, PNorm = 142.7915, GNorm = 0.2822, lr_0 = 5.9972e-04
Loss = 1.9418e-02, PNorm = 142.8311, GNorm = 0.2554, lr_0 = 5.9931e-04
Loss = 1.8237e-02, PNorm = 142.8691, GNorm = 0.1439, lr_0 = 5.9890e-04
Loss = 1.8556e-02, PNorm = 142.9000, GNorm = 0.2796, lr_0 = 5.9849e-04
Loss = 2.1520e-02, PNorm = 142.9341, GNorm = 0.4487, lr_0 = 5.9808e-04
Loss = 2.1773e-02, PNorm = 142.9763, GNorm = 0.3583, lr_0 = 5.9767e-04
Loss = 2.2418e-02, PNorm = 143.0164, GNorm = 0.5010, lr_0 = 5.9726e-04
Loss = 2.3057e-02, PNorm = 143.0622, GNorm = 0.3812, lr_0 = 5.9685e-04
Loss = 2.1523e-02, PNorm = 143.1054, GNorm = 0.5466, lr_0 = 5.9644e-04
Loss = 1.8869e-02, PNorm = 143.1451, GNorm = 0.3805, lr_0 = 5.9603e-04
Loss = 2.1574e-02, PNorm = 143.1878, GNorm = 0.6092, lr_0 = 5.9562e-04
Loss = 2.1903e-02, PNorm = 143.2314, GNorm = 0.1985, lr_0 = 5.9521e-04
Loss = 1.7930e-02, PNorm = 143.2735, GNorm = 0.3806, lr_0 = 5.9481e-04
Loss = 2.1575e-02, PNorm = 143.3075, GNorm = 0.2477, lr_0 = 5.9440e-04
Loss = 2.0309e-02, PNorm = 143.3489, GNorm = 0.5500, lr_0 = 5.9399e-04
Loss = 2.1641e-02, PNorm = 143.3888, GNorm = 0.5286, lr_0 = 5.9358e-04
Loss = 2.3889e-02, PNorm = 143.4283, GNorm = 0.1599, lr_0 = 5.9318e-04
Loss = 2.1758e-02, PNorm = 143.4717, GNorm = 0.1849, lr_0 = 5.9277e-04
Loss = 2.0295e-02, PNorm = 143.5094, GNorm = 0.2089, lr_0 = 5.9236e-04
Loss = 2.0641e-02, PNorm = 143.5538, GNorm = 0.5263, lr_0 = 5.9196e-04
Loss = 2.4323e-02, PNorm = 143.5938, GNorm = 0.2293, lr_0 = 5.9155e-04
Loss = 2.1400e-02, PNorm = 143.6420, GNorm = 0.3924, lr_0 = 5.9115e-04
Loss = 2.0459e-02, PNorm = 143.6820, GNorm = 0.3606, lr_0 = 5.9074e-04
Loss = 2.5129e-02, PNorm = 143.7214, GNorm = 0.5084, lr_0 = 5.9034e-04
Loss = 2.1535e-02, PNorm = 143.7616, GNorm = 0.4051, lr_0 = 5.8993e-04
Loss = 1.9743e-02, PNorm = 143.8048, GNorm = 0.2177, lr_0 = 5.8953e-04
Loss = 2.1448e-02, PNorm = 143.8448, GNorm = 0.3063, lr_0 = 5.8913e-04
Loss = 2.2601e-02, PNorm = 143.8888, GNorm = 0.3232, lr_0 = 5.8872e-04
Loss = 2.2884e-02, PNorm = 143.9289, GNorm = 0.5442, lr_0 = 5.8832e-04
Loss = 2.0906e-02, PNorm = 143.9689, GNorm = 0.2150, lr_0 = 5.8792e-04
Loss = 2.2684e-02, PNorm = 144.0095, GNorm = 0.2352, lr_0 = 5.8751e-04
Loss = 2.1498e-02, PNorm = 144.0554, GNorm = 0.1407, lr_0 = 5.8711e-04
Loss = 2.5520e-02, PNorm = 144.1023, GNorm = 0.2603, lr_0 = 5.8671e-04
Loss = 2.0689e-02, PNorm = 144.1491, GNorm = 0.7218, lr_0 = 5.8631e-04
Loss = 2.2730e-02, PNorm = 144.1952, GNorm = 0.2591, lr_0 = 5.8591e-04
Loss = 2.2636e-02, PNorm = 144.2372, GNorm = 0.8358, lr_0 = 5.8550e-04
Loss = 1.8897e-02, PNorm = 144.2825, GNorm = 0.4074, lr_0 = 5.8510e-04
Loss = 2.1636e-02, PNorm = 144.3217, GNorm = 0.4802, lr_0 = 5.8470e-04
Loss = 1.6881e-02, PNorm = 144.3657, GNorm = 0.4740, lr_0 = 5.8430e-04
Loss = 1.7519e-02, PNorm = 144.4051, GNorm = 0.3714, lr_0 = 5.8390e-04
Loss = 2.3402e-02, PNorm = 144.4552, GNorm = 0.3851, lr_0 = 5.8350e-04
Loss = 2.2736e-02, PNorm = 144.4994, GNorm = 0.5820, lr_0 = 5.8310e-04
Loss = 2.1694e-02, PNorm = 144.5465, GNorm = 0.4990, lr_0 = 5.8270e-04
Loss = 1.8561e-02, PNorm = 144.5923, GNorm = 0.3005, lr_0 = 5.8230e-04
Loss = 2.0606e-02, PNorm = 144.6359, GNorm = 0.5667, lr_0 = 5.8190e-04
Loss = 1.7470e-02, PNorm = 144.6801, GNorm = 0.2130, lr_0 = 5.8151e-04
Loss = 1.8782e-02, PNorm = 144.7156, GNorm = 0.5135, lr_0 = 5.8111e-04
Loss = 2.2095e-02, PNorm = 144.7564, GNorm = 0.4613, lr_0 = 5.8071e-04
Loss = 1.9386e-02, PNorm = 144.7977, GNorm = 0.6234, lr_0 = 5.8031e-04
Loss = 2.2225e-02, PNorm = 144.8356, GNorm = 0.1957, lr_0 = 5.7991e-04
Loss = 2.3850e-02, PNorm = 144.8799, GNorm = 0.6976, lr_0 = 5.7952e-04
Loss = 2.2487e-02, PNorm = 144.9323, GNorm = 0.2471, lr_0 = 5.7912e-04
Loss = 1.9612e-02, PNorm = 144.9737, GNorm = 0.2172, lr_0 = 5.7872e-04
Loss = 1.9169e-02, PNorm = 145.0222, GNorm = 0.3036, lr_0 = 5.7833e-04
Loss = 2.2554e-02, PNorm = 145.0628, GNorm = 0.1754, lr_0 = 5.7793e-04
Loss = 1.9709e-02, PNorm = 145.1110, GNorm = 0.2634, lr_0 = 5.7753e-04
Loss = 2.2182e-02, PNorm = 145.1562, GNorm = 0.9898, lr_0 = 5.7714e-04
Loss = 1.7909e-02, PNorm = 145.1965, GNorm = 0.6037, lr_0 = 5.7674e-04
Loss = 2.6300e-02, PNorm = 145.2440, GNorm = 0.3172, lr_0 = 5.7635e-04
Loss = 2.4820e-02, PNorm = 145.2970, GNorm = 0.5994, lr_0 = 5.7595e-04
Loss = 2.3431e-02, PNorm = 145.3518, GNorm = 0.4627, lr_0 = 5.7556e-04
Loss = 2.4133e-02, PNorm = 145.4045, GNorm = 0.4660, lr_0 = 5.7516e-04
Loss = 2.1291e-02, PNorm = 145.4606, GNorm = 0.2130, lr_0 = 5.7477e-04
Loss = 1.6301e-02, PNorm = 145.5126, GNorm = 0.3157, lr_0 = 5.7438e-04
Loss = 2.5120e-02, PNorm = 145.5609, GNorm = 0.2608, lr_0 = 5.7398e-04
Loss = 2.1747e-02, PNorm = 145.6113, GNorm = 0.5920, lr_0 = 5.7359e-04
Loss = 2.2002e-02, PNorm = 145.6632, GNorm = 0.3418, lr_0 = 5.7320e-04
Loss = 2.3427e-02, PNorm = 145.7098, GNorm = 0.3941, lr_0 = 5.7280e-04
Loss = 2.7076e-02, PNorm = 145.7600, GNorm = 0.8717, lr_0 = 5.7241e-04
Loss = 2.1128e-02, PNorm = 145.8163, GNorm = 0.2274, lr_0 = 5.7202e-04
Loss = 2.0569e-02, PNorm = 145.8625, GNorm = 0.5119, lr_0 = 5.7163e-04
Loss = 2.1999e-02, PNorm = 145.9142, GNorm = 0.7163, lr_0 = 5.7124e-04
Loss = 2.3873e-02, PNorm = 145.9612, GNorm = 0.6716, lr_0 = 5.7084e-04
Loss = 2.8752e-02, PNorm = 146.0140, GNorm = 0.1990, lr_0 = 5.7045e-04
Loss = 2.5227e-02, PNorm = 146.0700, GNorm = 0.2483, lr_0 = 5.7006e-04
Loss = 2.3022e-02, PNorm = 146.1229, GNorm = 0.3603, lr_0 = 5.6967e-04
Loss = 2.4863e-02, PNorm = 146.1703, GNorm = 0.3745, lr_0 = 5.6928e-04
Loss = 2.0515e-02, PNorm = 146.2201, GNorm = 0.3539, lr_0 = 5.6889e-04
Loss = 2.3391e-02, PNorm = 146.2614, GNorm = 0.2695, lr_0 = 5.6850e-04
Loss = 2.2443e-02, PNorm = 146.3121, GNorm = 0.4927, lr_0 = 5.6811e-04
Loss = 2.7743e-02, PNorm = 146.3683, GNorm = 0.3079, lr_0 = 5.6772e-04
Loss = 1.7164e-02, PNorm = 146.4190, GNorm = 0.2385, lr_0 = 5.6733e-04
Loss = 2.3003e-02, PNorm = 146.4698, GNorm = 0.2463, lr_0 = 5.6695e-04
Loss = 2.0532e-02, PNorm = 146.5219, GNorm = 0.1776, lr_0 = 5.6656e-04
Loss = 2.1869e-02, PNorm = 146.5694, GNorm = 0.5439, lr_0 = 5.6617e-04
Loss = 2.0718e-02, PNorm = 146.6163, GNorm = 0.4717, lr_0 = 5.6578e-04
Loss = 2.7103e-02, PNorm = 146.6605, GNorm = 0.1802, lr_0 = 5.6539e-04
Loss = 1.9855e-02, PNorm = 146.7028, GNorm = 0.5422, lr_0 = 5.6501e-04
Loss = 2.4465e-02, PNorm = 146.7441, GNorm = 0.3288, lr_0 = 5.6462e-04
Loss = 2.7529e-02, PNorm = 146.7914, GNorm = 0.7033, lr_0 = 5.6423e-04
Loss = 2.1696e-02, PNorm = 146.8418, GNorm = 0.5151, lr_0 = 5.6385e-04
Loss = 2.0612e-02, PNorm = 146.8914, GNorm = 0.6824, lr_0 = 5.6346e-04
Loss = 2.2963e-02, PNorm = 146.9372, GNorm = 0.5218, lr_0 = 5.6307e-04
Loss = 2.2298e-02, PNorm = 146.9855, GNorm = 0.1638, lr_0 = 5.6269e-04
Loss = 2.2256e-02, PNorm = 147.0278, GNorm = 0.4470, lr_0 = 5.6230e-04
Validation mae = 0.282008
Epoch 9
Loss = 1.7116e-02, PNorm = 147.0651, GNorm = 0.2141, lr_0 = 5.6192e-04
Loss = 2.0931e-02, PNorm = 147.0973, GNorm = 0.2598, lr_0 = 5.6153e-04
Loss = 1.8440e-02, PNorm = 147.1336, GNorm = 0.4028, lr_0 = 5.6115e-04
Loss = 2.0921e-02, PNorm = 147.1706, GNorm = 0.5867, lr_0 = 5.6076e-04
Loss = 1.6823e-02, PNorm = 147.2032, GNorm = 0.4852, lr_0 = 5.6038e-04
Loss = 1.7556e-02, PNorm = 147.2352, GNorm = 0.5224, lr_0 = 5.6000e-04
Loss = 2.0630e-02, PNorm = 147.2696, GNorm = 0.2192, lr_0 = 5.5961e-04
Loss = 1.7076e-02, PNorm = 147.3030, GNorm = 0.4595, lr_0 = 5.5923e-04
Loss = 1.7190e-02, PNorm = 147.3346, GNorm = 0.7994, lr_0 = 5.5885e-04
Loss = 1.9864e-02, PNorm = 147.3715, GNorm = 0.2396, lr_0 = 5.5846e-04
Loss = 2.0122e-02, PNorm = 147.4049, GNorm = 0.4597, lr_0 = 5.5808e-04
Loss = 1.8109e-02, PNorm = 147.4411, GNorm = 0.2734, lr_0 = 5.5770e-04
Loss = 1.8464e-02, PNorm = 147.4784, GNorm = 0.1588, lr_0 = 5.5732e-04
Loss = 1.5931e-02, PNorm = 147.5118, GNorm = 0.4614, lr_0 = 5.5693e-04
Loss = 1.4764e-02, PNorm = 147.5440, GNorm = 0.7531, lr_0 = 5.5655e-04
Loss = 1.7361e-02, PNorm = 147.5716, GNorm = 0.2593, lr_0 = 5.5617e-04
Loss = 1.6817e-02, PNorm = 147.5986, GNorm = 0.5491, lr_0 = 5.5579e-04
Loss = 1.8919e-02, PNorm = 147.6244, GNorm = 0.4681, lr_0 = 5.5541e-04
Loss = 1.8556e-02, PNorm = 147.6586, GNorm = 0.5023, lr_0 = 5.5503e-04
Loss = 1.8638e-02, PNorm = 147.6965, GNorm = 0.3502, lr_0 = 5.5465e-04
Loss = 1.9083e-02, PNorm = 147.7362, GNorm = 0.6576, lr_0 = 5.5427e-04
Loss = 1.6017e-02, PNorm = 147.7727, GNorm = 0.2511, lr_0 = 5.5389e-04
Loss = 1.4363e-02, PNorm = 147.8055, GNorm = 0.3386, lr_0 = 5.5351e-04
Loss = 1.5115e-02, PNorm = 147.8356, GNorm = 0.2561, lr_0 = 5.5313e-04
Loss = 1.4227e-02, PNorm = 147.8684, GNorm = 0.2930, lr_0 = 5.5275e-04
Loss = 1.8653e-02, PNorm = 147.8990, GNorm = 0.2608, lr_0 = 5.5237e-04
Loss = 1.5122e-02, PNorm = 147.9289, GNorm = 0.3062, lr_0 = 5.5199e-04
Loss = 1.6262e-02, PNorm = 147.9631, GNorm = 0.1758, lr_0 = 5.5162e-04
Loss = 2.3158e-02, PNorm = 147.9973, GNorm = 0.4156, lr_0 = 5.5124e-04
Loss = 1.6152e-02, PNorm = 148.0318, GNorm = 0.2422, lr_0 = 5.5086e-04
Loss = 1.5023e-02, PNorm = 148.0684, GNorm = 0.2902, lr_0 = 5.5048e-04
Loss = 1.8317e-02, PNorm = 148.1005, GNorm = 0.5718, lr_0 = 5.5011e-04
Loss = 1.7714e-02, PNorm = 148.1349, GNorm = 0.2184, lr_0 = 5.4973e-04
Loss = 1.4608e-02, PNorm = 148.1677, GNorm = 0.4077, lr_0 = 5.4935e-04
Loss = 1.7776e-02, PNorm = 148.1962, GNorm = 0.5791, lr_0 = 5.4898e-04
Loss = 1.6079e-02, PNorm = 148.2276, GNorm = 0.3185, lr_0 = 5.4860e-04
Loss = 1.7268e-02, PNorm = 148.2624, GNorm = 0.5288, lr_0 = 5.4822e-04
Loss = 1.5022e-02, PNorm = 148.2991, GNorm = 0.3856, lr_0 = 5.4785e-04
Loss = 1.7053e-02, PNorm = 148.3320, GNorm = 0.4901, lr_0 = 5.4747e-04
Loss = 1.5069e-02, PNorm = 148.3613, GNorm = 0.2462, lr_0 = 5.4710e-04
Loss = 1.5535e-02, PNorm = 148.3902, GNorm = 0.1834, lr_0 = 5.4672e-04
Loss = 1.6239e-02, PNorm = 148.4228, GNorm = 0.1607, lr_0 = 5.4635e-04
Loss = 1.5696e-02, PNorm = 148.4524, GNorm = 0.3299, lr_0 = 5.4597e-04
Loss = 1.5768e-02, PNorm = 148.4882, GNorm = 0.2088, lr_0 = 5.4560e-04
Loss = 1.4644e-02, PNorm = 148.5214, GNorm = 0.3963, lr_0 = 5.4523e-04
Loss = 1.6387e-02, PNorm = 148.5556, GNorm = 0.1561, lr_0 = 5.4485e-04
Loss = 1.4953e-02, PNorm = 148.5913, GNorm = 0.2838, lr_0 = 5.4448e-04
Loss = 1.3131e-02, PNorm = 148.6199, GNorm = 0.2107, lr_0 = 5.4411e-04
Loss = 1.9046e-02, PNorm = 148.6514, GNorm = 0.2648, lr_0 = 5.4373e-04
Loss = 1.9869e-02, PNorm = 148.6854, GNorm = 0.6965, lr_0 = 5.4336e-04
Loss = 1.5201e-02, PNorm = 148.7175, GNorm = 0.4395, lr_0 = 5.4299e-04
Loss = 1.5770e-02, PNorm = 148.7534, GNorm = 0.3027, lr_0 = 5.4262e-04
Loss = 1.4194e-02, PNorm = 148.7892, GNorm = 0.2971, lr_0 = 5.4225e-04
Loss = 1.9733e-02, PNorm = 148.8260, GNorm = 0.1428, lr_0 = 5.4187e-04
Loss = 1.8157e-02, PNorm = 148.8577, GNorm = 0.2980, lr_0 = 5.4150e-04
Loss = 1.5986e-02, PNorm = 148.8942, GNorm = 0.2371, lr_0 = 5.4113e-04
Loss = 1.5268e-02, PNorm = 148.9264, GNorm = 0.4949, lr_0 = 5.4076e-04
Loss = 1.9166e-02, PNorm = 148.9662, GNorm = 0.3569, lr_0 = 5.4039e-04
Loss = 1.5521e-02, PNorm = 149.0064, GNorm = 0.2556, lr_0 = 5.4002e-04
Loss = 1.8638e-02, PNorm = 149.0427, GNorm = 0.5007, lr_0 = 5.3965e-04
Loss = 1.7406e-02, PNorm = 149.0833, GNorm = 0.1591, lr_0 = 5.3928e-04
Loss = 1.8379e-02, PNorm = 149.1270, GNorm = 0.1859, lr_0 = 5.3891e-04
Loss = 1.3841e-02, PNorm = 149.1632, GNorm = 0.1734, lr_0 = 5.3854e-04
Loss = 1.5844e-02, PNorm = 149.1986, GNorm = 0.6296, lr_0 = 5.3817e-04
Loss = 1.9281e-02, PNorm = 149.2301, GNorm = 0.2450, lr_0 = 5.3781e-04
Loss = 1.6160e-02, PNorm = 149.2687, GNorm = 0.4098, lr_0 = 5.3744e-04
Loss = 1.8134e-02, PNorm = 149.3049, GNorm = 0.2774, lr_0 = 5.3707e-04
Loss = 1.8426e-02, PNorm = 149.3465, GNorm = 0.2819, lr_0 = 5.3670e-04
Loss = 1.9770e-02, PNorm = 149.3842, GNorm = 0.3733, lr_0 = 5.3633e-04
Loss = 1.8675e-02, PNorm = 149.4276, GNorm = 0.5855, lr_0 = 5.3597e-04
Loss = 1.9079e-02, PNorm = 149.4693, GNorm = 0.6282, lr_0 = 5.3560e-04
Loss = 1.7845e-02, PNorm = 149.5145, GNorm = 0.2685, lr_0 = 5.3523e-04
Loss = 1.7190e-02, PNorm = 149.5542, GNorm = 0.3349, lr_0 = 5.3486e-04
Loss = 1.7211e-02, PNorm = 149.5939, GNorm = 0.5140, lr_0 = 5.3450e-04
Loss = 1.4597e-02, PNorm = 149.6335, GNorm = 0.1758, lr_0 = 5.3413e-04
Loss = 1.6614e-02, PNorm = 149.6728, GNorm = 0.3079, lr_0 = 5.3377e-04
Loss = 2.0447e-02, PNorm = 149.7114, GNorm = 0.2167, lr_0 = 5.3340e-04
Loss = 1.5826e-02, PNorm = 149.7511, GNorm = 0.2309, lr_0 = 5.3304e-04
Loss = 1.5187e-02, PNorm = 149.7843, GNorm = 1.0466, lr_0 = 5.3267e-04
Loss = 1.9545e-02, PNorm = 149.8201, GNorm = 0.2195, lr_0 = 5.3231e-04
Loss = 2.1369e-02, PNorm = 149.8622, GNorm = 0.2960, lr_0 = 5.3194e-04
Loss = 1.7561e-02, PNorm = 149.9061, GNorm = 0.6058, lr_0 = 5.3158e-04
Loss = 1.8597e-02, PNorm = 149.9506, GNorm = 0.3234, lr_0 = 5.3121e-04
Loss = 1.6450e-02, PNorm = 149.9924, GNorm = 0.4170, lr_0 = 5.3085e-04
Loss = 1.7596e-02, PNorm = 150.0303, GNorm = 0.5844, lr_0 = 5.3048e-04
Loss = 1.9368e-02, PNorm = 150.0682, GNorm = 0.3521, lr_0 = 5.3012e-04
Loss = 1.5887e-02, PNorm = 150.1032, GNorm = 0.7021, lr_0 = 5.2976e-04
Loss = 1.8455e-02, PNorm = 150.1373, GNorm = 0.3613, lr_0 = 5.2939e-04
Loss = 1.6722e-02, PNorm = 150.1739, GNorm = 0.2619, lr_0 = 5.2903e-04
Loss = 1.8359e-02, PNorm = 150.2124, GNorm = 0.3754, lr_0 = 5.2867e-04
Loss = 2.3210e-02, PNorm = 150.2444, GNorm = 0.3828, lr_0 = 5.2831e-04
Loss = 1.8427e-02, PNorm = 150.2866, GNorm = 0.4040, lr_0 = 5.2795e-04
Loss = 1.5064e-02, PNorm = 150.3324, GNorm = 0.2358, lr_0 = 5.2758e-04
Loss = 1.9079e-02, PNorm = 150.3718, GNorm = 0.2556, lr_0 = 5.2722e-04
Loss = 1.8596e-02, PNorm = 150.4166, GNorm = 0.4322, lr_0 = 5.2686e-04
Loss = 1.6576e-02, PNorm = 150.4622, GNorm = 0.4885, lr_0 = 5.2650e-04
Loss = 1.9998e-02, PNorm = 150.5046, GNorm = 0.4111, lr_0 = 5.2614e-04
Loss = 2.0464e-02, PNorm = 150.5472, GNorm = 0.2410, lr_0 = 5.2578e-04
Loss = 1.9410e-02, PNorm = 150.5893, GNorm = 0.4227, lr_0 = 5.2542e-04
Loss = 1.6123e-02, PNorm = 150.6327, GNorm = 0.3772, lr_0 = 5.2506e-04
Loss = 1.6920e-02, PNorm = 150.6741, GNorm = 0.4374, lr_0 = 5.2470e-04
Loss = 1.6707e-02, PNorm = 150.7133, GNorm = 0.4654, lr_0 = 5.2434e-04
Loss = 1.5679e-02, PNorm = 150.7527, GNorm = 0.6244, lr_0 = 5.2398e-04
Loss = 1.9366e-02, PNorm = 150.7919, GNorm = 0.5351, lr_0 = 5.2362e-04
Loss = 2.0964e-02, PNorm = 150.8333, GNorm = 0.6566, lr_0 = 5.2326e-04
Loss = 1.7985e-02, PNorm = 150.8815, GNorm = 0.8115, lr_0 = 5.2290e-04
Loss = 1.5959e-02, PNorm = 150.9286, GNorm = 0.2448, lr_0 = 5.2255e-04
Loss = 2.2217e-02, PNorm = 150.9661, GNorm = 0.5483, lr_0 = 5.2219e-04
Loss = 1.9682e-02, PNorm = 151.0102, GNorm = 0.3888, lr_0 = 5.2183e-04
Loss = 1.7269e-02, PNorm = 151.0516, GNorm = 0.2857, lr_0 = 5.2147e-04
Loss = 2.0501e-02, PNorm = 151.0931, GNorm = 0.2062, lr_0 = 5.2112e-04
Loss = 1.9426e-02, PNorm = 151.1426, GNorm = 0.6276, lr_0 = 5.2076e-04
Loss = 1.5180e-02, PNorm = 151.1856, GNorm = 0.3821, lr_0 = 5.2040e-04
Loss = 1.8504e-02, PNorm = 151.2269, GNorm = 0.6211, lr_0 = 5.2005e-04
Loss = 1.8399e-02, PNorm = 151.2642, GNorm = 0.2669, lr_0 = 5.1969e-04
Loss = 1.5031e-02, PNorm = 151.3020, GNorm = 0.4611, lr_0 = 5.1933e-04
Loss = 2.2450e-02, PNorm = 151.3465, GNorm = 0.3400, lr_0 = 5.1898e-04
Loss = 1.8471e-02, PNorm = 151.3917, GNorm = 0.2392, lr_0 = 5.1862e-04
Loss = 2.0273e-02, PNorm = 151.4362, GNorm = 0.3897, lr_0 = 5.1827e-04
Loss = 1.6831e-02, PNorm = 151.4802, GNorm = 0.2742, lr_0 = 5.1791e-04
Validation mae = 0.282966
Epoch 10
Loss = 1.4127e-02, PNorm = 151.5135, GNorm = 0.2928, lr_0 = 5.1756e-04
Loss = 1.4663e-02, PNorm = 151.5415, GNorm = 0.2338, lr_0 = 5.1720e-04
Loss = 1.5781e-02, PNorm = 151.5673, GNorm = 0.5811, lr_0 = 5.1685e-04
Loss = 1.5458e-02, PNorm = 151.5991, GNorm = 0.2619, lr_0 = 5.1649e-04
Loss = 1.4906e-02, PNorm = 151.6232, GNorm = 0.6492, lr_0 = 5.1614e-04
Loss = 1.7342e-02, PNorm = 151.6482, GNorm = 0.2178, lr_0 = 5.1579e-04
Loss = 1.5948e-02, PNorm = 151.6798, GNorm = 0.1940, lr_0 = 5.1543e-04
Loss = 1.3871e-02, PNorm = 151.7113, GNorm = 0.3396, lr_0 = 5.1508e-04
Loss = 1.3633e-02, PNorm = 151.7421, GNorm = 0.2784, lr_0 = 5.1473e-04
Loss = 1.4226e-02, PNorm = 151.7714, GNorm = 0.6192, lr_0 = 5.1437e-04
Loss = 1.3843e-02, PNorm = 151.7942, GNorm = 0.2856, lr_0 = 5.1402e-04
Loss = 1.6911e-02, PNorm = 151.8221, GNorm = 0.5793, lr_0 = 5.1367e-04
Loss = 1.4703e-02, PNorm = 151.8520, GNorm = 0.3404, lr_0 = 5.1332e-04
Loss = 1.4874e-02, PNorm = 151.8857, GNorm = 0.3658, lr_0 = 5.1297e-04
Loss = 1.1655e-02, PNorm = 151.9142, GNorm = 0.3343, lr_0 = 5.1262e-04
Loss = 1.2349e-02, PNorm = 151.9391, GNorm = 0.4283, lr_0 = 5.1226e-04
Loss = 1.6337e-02, PNorm = 151.9651, GNorm = 0.5547, lr_0 = 5.1191e-04
Loss = 1.8863e-02, PNorm = 151.9939, GNorm = 0.4544, lr_0 = 5.1156e-04
Loss = 1.6598e-02, PNorm = 152.0199, GNorm = 0.2623, lr_0 = 5.1121e-04
Loss = 1.6807e-02, PNorm = 152.0507, GNorm = 0.4047, lr_0 = 5.1086e-04
Loss = 1.4687e-02, PNorm = 152.0796, GNorm = 0.2430, lr_0 = 5.1051e-04
Loss = 1.4077e-02, PNorm = 152.1062, GNorm = 0.4184, lr_0 = 5.1016e-04
Loss = 1.3294e-02, PNorm = 152.1375, GNorm = 0.1380, lr_0 = 5.0981e-04
Loss = 1.6715e-02, PNorm = 152.1688, GNorm = 0.4546, lr_0 = 5.0946e-04
Loss = 1.4243e-02, PNorm = 152.1968, GNorm = 0.2765, lr_0 = 5.0911e-04
Loss = 1.4318e-02, PNorm = 152.2260, GNorm = 0.4504, lr_0 = 5.0877e-04
Loss = 1.4646e-02, PNorm = 152.2530, GNorm = 0.5495, lr_0 = 5.0842e-04
Loss = 1.2902e-02, PNorm = 152.2774, GNorm = 0.1948, lr_0 = 5.0807e-04
Loss = 1.3381e-02, PNorm = 152.3068, GNorm = 0.4903, lr_0 = 5.0772e-04
Loss = 1.3617e-02, PNorm = 152.3338, GNorm = 0.4065, lr_0 = 5.0737e-04
Loss = 1.4165e-02, PNorm = 152.3657, GNorm = 0.5238, lr_0 = 5.0703e-04
Loss = 1.8041e-02, PNorm = 152.3936, GNorm = 0.3563, lr_0 = 5.0668e-04
Loss = 1.2211e-02, PNorm = 152.4233, GNorm = 0.2077, lr_0 = 5.0633e-04
Loss = 1.1784e-02, PNorm = 152.4504, GNorm = 0.4055, lr_0 = 5.0598e-04
Loss = 1.3783e-02, PNorm = 152.4749, GNorm = 0.2001, lr_0 = 5.0564e-04
Loss = 1.3253e-02, PNorm = 152.5004, GNorm = 0.2764, lr_0 = 5.0529e-04
Loss = 1.4297e-02, PNorm = 152.5280, GNorm = 0.3656, lr_0 = 5.0494e-04
Loss = 1.4674e-02, PNorm = 152.5567, GNorm = 0.1410, lr_0 = 5.0460e-04
Loss = 1.2644e-02, PNorm = 152.5883, GNorm = 0.4207, lr_0 = 5.0425e-04
Loss = 1.4628e-02, PNorm = 152.6207, GNorm = 0.3353, lr_0 = 5.0391e-04
Loss = 1.5083e-02, PNorm = 152.6507, GNorm = 0.1827, lr_0 = 5.0356e-04
Loss = 1.5036e-02, PNorm = 152.6778, GNorm = 0.2162, lr_0 = 5.0322e-04
Loss = 1.3187e-02, PNorm = 152.7035, GNorm = 0.2867, lr_0 = 5.0287e-04
Loss = 1.4603e-02, PNorm = 152.7306, GNorm = 0.1694, lr_0 = 5.0253e-04
Loss = 1.6420e-02, PNorm = 152.7626, GNorm = 0.3718, lr_0 = 5.0218e-04
Loss = 1.2749e-02, PNorm = 152.7964, GNorm = 0.2140, lr_0 = 5.0184e-04
Loss = 1.5878e-02, PNorm = 152.8215, GNorm = 0.3684, lr_0 = 5.0150e-04
Loss = 1.4024e-02, PNorm = 152.8521, GNorm = 0.3193, lr_0 = 5.0115e-04
Loss = 1.4130e-02, PNorm = 152.8774, GNorm = 0.4329, lr_0 = 5.0081e-04
Loss = 1.3419e-02, PNorm = 152.9074, GNorm = 0.2401, lr_0 = 5.0047e-04
Loss = 1.5991e-02, PNorm = 152.9314, GNorm = 0.1633, lr_0 = 5.0012e-04
Loss = 1.2952e-02, PNorm = 152.9604, GNorm = 0.3315, lr_0 = 4.9978e-04
Loss = 1.2545e-02, PNorm = 152.9900, GNorm = 0.2492, lr_0 = 4.9944e-04
Loss = 1.4971e-02, PNorm = 153.0223, GNorm = 0.3541, lr_0 = 4.9910e-04
Loss = 1.2676e-02, PNorm = 153.0439, GNorm = 0.4007, lr_0 = 4.9875e-04
Loss = 1.3338e-02, PNorm = 153.0677, GNorm = 0.3724, lr_0 = 4.9841e-04
Loss = 1.1601e-02, PNorm = 153.0903, GNorm = 0.1132, lr_0 = 4.9807e-04
Loss = 1.3082e-02, PNorm = 153.1171, GNorm = 0.2100, lr_0 = 4.9773e-04
Loss = 1.4473e-02, PNorm = 153.1503, GNorm = 0.1439, lr_0 = 4.9739e-04
Loss = 1.4742e-02, PNorm = 153.1796, GNorm = 0.3087, lr_0 = 4.9705e-04
Loss = 1.2804e-02, PNorm = 153.2066, GNorm = 0.1180, lr_0 = 4.9671e-04
Loss = 1.2875e-02, PNorm = 153.2355, GNorm = 0.3184, lr_0 = 4.9637e-04
Loss = 1.2381e-02, PNorm = 153.2672, GNorm = 0.5595, lr_0 = 4.9603e-04
Loss = 1.3642e-02, PNorm = 153.2973, GNorm = 0.3303, lr_0 = 4.9569e-04
Loss = 1.5855e-02, PNorm = 153.3323, GNorm = 0.6958, lr_0 = 4.9535e-04
Loss = 1.4067e-02, PNorm = 153.3667, GNorm = 0.1512, lr_0 = 4.9501e-04
Loss = 1.3618e-02, PNorm = 153.4032, GNorm = 0.2182, lr_0 = 4.9467e-04
Loss = 1.3845e-02, PNorm = 153.4337, GNorm = 0.4351, lr_0 = 4.9433e-04
Loss = 1.2853e-02, PNorm = 153.4654, GNorm = 0.5956, lr_0 = 4.9399e-04
Loss = 1.1364e-02, PNorm = 153.4929, GNorm = 0.1265, lr_0 = 4.9365e-04
Loss = 1.5141e-02, PNorm = 153.5198, GNorm = 0.4449, lr_0 = 4.9332e-04
Loss = 1.2228e-02, PNorm = 153.5510, GNorm = 0.4970, lr_0 = 4.9298e-04
Loss = 1.4059e-02, PNorm = 153.5782, GNorm = 0.4174, lr_0 = 4.9264e-04
Loss = 1.3185e-02, PNorm = 153.6022, GNorm = 0.2393, lr_0 = 4.9230e-04
Loss = 1.6378e-02, PNorm = 153.6338, GNorm = 0.2067, lr_0 = 4.9197e-04
Loss = 1.4137e-02, PNorm = 153.6684, GNorm = 0.2767, lr_0 = 4.9163e-04
Loss = 1.3616e-02, PNorm = 153.6968, GNorm = 0.5908, lr_0 = 4.9129e-04
Loss = 1.4246e-02, PNorm = 153.7277, GNorm = 0.2802, lr_0 = 4.9095e-04
Loss = 1.6126e-02, PNorm = 153.7589, GNorm = 0.5351, lr_0 = 4.9062e-04
Loss = 1.9171e-02, PNorm = 153.7911, GNorm = 1.0394, lr_0 = 4.9028e-04
Loss = 1.7350e-02, PNorm = 153.8291, GNorm = 0.1608, lr_0 = 4.8995e-04
Loss = 1.6552e-02, PNorm = 153.8631, GNorm = 0.6469, lr_0 = 4.8961e-04
Loss = 1.5100e-02, PNorm = 153.8965, GNorm = 0.3051, lr_0 = 4.8928e-04
Loss = 1.3338e-02, PNorm = 153.9234, GNorm = 0.4095, lr_0 = 4.8894e-04
Loss = 1.6285e-02, PNorm = 153.9540, GNorm = 0.2261, lr_0 = 4.8861e-04
Loss = 1.3653e-02, PNorm = 153.9840, GNorm = 0.3580, lr_0 = 4.8827e-04
Loss = 1.3250e-02, PNorm = 154.0165, GNorm = 0.5597, lr_0 = 4.8794e-04
Loss = 1.4548e-02, PNorm = 154.0495, GNorm = 0.1352, lr_0 = 4.8760e-04
Loss = 1.2764e-02, PNorm = 154.0871, GNorm = 0.2417, lr_0 = 4.8727e-04
Loss = 1.5649e-02, PNorm = 154.1214, GNorm = 0.2898, lr_0 = 4.8693e-04
Loss = 1.5519e-02, PNorm = 154.1541, GNorm = 0.3464, lr_0 = 4.8660e-04
Loss = 1.6557e-02, PNorm = 154.1934, GNorm = 0.4755, lr_0 = 4.8627e-04
Loss = 1.4256e-02, PNorm = 154.2298, GNorm = 0.1484, lr_0 = 4.8593e-04
Loss = 1.3721e-02, PNorm = 154.2621, GNorm = 0.4065, lr_0 = 4.8560e-04
Loss = 1.6593e-02, PNorm = 154.2967, GNorm = 0.3604, lr_0 = 4.8527e-04
Loss = 1.5174e-02, PNorm = 154.3325, GNorm = 0.3138, lr_0 = 4.8494e-04
Loss = 1.4587e-02, PNorm = 154.3647, GNorm = 0.1054, lr_0 = 4.8460e-04
Loss = 1.4471e-02, PNorm = 154.3968, GNorm = 0.1755, lr_0 = 4.8427e-04
Loss = 1.4565e-02, PNorm = 154.4281, GNorm = 0.2328, lr_0 = 4.8394e-04
Loss = 1.4321e-02, PNorm = 154.4636, GNorm = 0.4839, lr_0 = 4.8361e-04
Loss = 1.2542e-02, PNorm = 154.4944, GNorm = 0.1730, lr_0 = 4.8328e-04
Loss = 1.2626e-02, PNorm = 154.5231, GNorm = 0.5459, lr_0 = 4.8295e-04
Loss = 1.4861e-02, PNorm = 154.5517, GNorm = 0.3231, lr_0 = 4.8262e-04
Loss = 1.2781e-02, PNorm = 154.5812, GNorm = 0.3405, lr_0 = 4.8228e-04
Loss = 1.7465e-02, PNorm = 154.6145, GNorm = 0.2282, lr_0 = 4.8195e-04
Loss = 1.2930e-02, PNorm = 154.6448, GNorm = 0.3319, lr_0 = 4.8162e-04
Loss = 1.5343e-02, PNorm = 154.6780, GNorm = 0.2155, lr_0 = 4.8129e-04
Loss = 1.2557e-02, PNorm = 154.7113, GNorm = 0.2335, lr_0 = 4.8096e-04
Loss = 1.2149e-02, PNorm = 154.7430, GNorm = 0.3608, lr_0 = 4.8064e-04
Loss = 1.3339e-02, PNorm = 154.7743, GNorm = 0.1522, lr_0 = 4.8031e-04
Loss = 1.2641e-02, PNorm = 154.8003, GNorm = 0.1619, lr_0 = 4.7998e-04
Loss = 1.1298e-02, PNorm = 154.8314, GNorm = 0.2781, lr_0 = 4.7965e-04
Loss = 1.2019e-02, PNorm = 154.8598, GNorm = 0.1906, lr_0 = 4.7932e-04
Loss = 1.4201e-02, PNorm = 154.8897, GNorm = 0.3836, lr_0 = 4.7899e-04
Loss = 1.7564e-02, PNorm = 154.9219, GNorm = 0.5988, lr_0 = 4.7866e-04
Loss = 1.4438e-02, PNorm = 154.9578, GNorm = 0.3174, lr_0 = 4.7833e-04
Loss = 1.2744e-02, PNorm = 154.9926, GNorm = 0.7608, lr_0 = 4.7801e-04
Loss = 1.4654e-02, PNorm = 155.0205, GNorm = 0.3188, lr_0 = 4.7768e-04
Loss = 1.5971e-02, PNorm = 155.0523, GNorm = 0.2403, lr_0 = 4.7735e-04
Loss = 1.4421e-02, PNorm = 155.0878, GNorm = 0.2683, lr_0 = 4.7703e-04
Validation mae = 0.281976
Epoch 11
Loss = 1.3418e-02, PNorm = 155.1180, GNorm = 0.1708, lr_0 = 4.7670e-04
Loss = 1.1969e-02, PNorm = 155.1420, GNorm = 0.1909, lr_0 = 4.7637e-04
Loss = 1.1469e-02, PNorm = 155.1647, GNorm = 0.1596, lr_0 = 4.7605e-04
Loss = 1.0813e-02, PNorm = 155.1864, GNorm = 0.2236, lr_0 = 4.7572e-04
Loss = 1.1882e-02, PNorm = 155.1998, GNorm = 0.2154, lr_0 = 4.7539e-04
Loss = 1.2619e-02, PNorm = 155.2201, GNorm = 0.1409, lr_0 = 4.7507e-04
Loss = 1.2198e-02, PNorm = 155.2378, GNorm = 0.3020, lr_0 = 4.7474e-04
Loss = 1.2388e-02, PNorm = 155.2608, GNorm = 0.3002, lr_0 = 4.7442e-04
Loss = 1.2164e-02, PNorm = 155.2855, GNorm = 0.1336, lr_0 = 4.7409e-04
Loss = 1.1937e-02, PNorm = 155.3082, GNorm = 0.5425, lr_0 = 4.7377e-04
Loss = 1.3587e-02, PNorm = 155.3276, GNorm = 0.2700, lr_0 = 4.7344e-04
Loss = 1.1232e-02, PNorm = 155.3461, GNorm = 0.4634, lr_0 = 4.7312e-04
Loss = 1.0847e-02, PNorm = 155.3629, GNorm = 0.2054, lr_0 = 4.7279e-04
Loss = 1.6235e-02, PNorm = 155.3862, GNorm = 0.4481, lr_0 = 4.7247e-04
Loss = 1.4456e-02, PNorm = 155.4082, GNorm = 0.3304, lr_0 = 4.7215e-04
Loss = 1.0722e-02, PNorm = 155.4345, GNorm = 0.2586, lr_0 = 4.7182e-04
Loss = 1.1768e-02, PNorm = 155.4586, GNorm = 0.1687, lr_0 = 4.7150e-04
Loss = 1.2700e-02, PNorm = 155.4810, GNorm = 0.2432, lr_0 = 4.7118e-04
Loss = 1.1473e-02, PNorm = 155.4981, GNorm = 0.1446, lr_0 = 4.7085e-04
Loss = 1.1806e-02, PNorm = 155.5261, GNorm = 0.4794, lr_0 = 4.7053e-04
Loss = 1.1281e-02, PNorm = 155.5492, GNorm = 0.3410, lr_0 = 4.7021e-04
Loss = 1.1831e-02, PNorm = 155.5728, GNorm = 0.2687, lr_0 = 4.6989e-04
Loss = 1.2060e-02, PNorm = 155.5901, GNorm = 0.5885, lr_0 = 4.6957e-04
Loss = 1.2288e-02, PNorm = 155.6180, GNorm = 0.3224, lr_0 = 4.6924e-04
Loss = 1.2588e-02, PNorm = 155.6458, GNorm = 0.4592, lr_0 = 4.6892e-04
Loss = 1.1998e-02, PNorm = 155.6766, GNorm = 0.6337, lr_0 = 4.6860e-04
Loss = 1.1512e-02, PNorm = 155.6996, GNorm = 0.4921, lr_0 = 4.6828e-04
Loss = 9.3008e-03, PNorm = 155.7206, GNorm = 0.3008, lr_0 = 4.6796e-04
Loss = 1.3597e-02, PNorm = 155.7438, GNorm = 0.3181, lr_0 = 4.6764e-04
Loss = 1.2850e-02, PNorm = 155.7722, GNorm = 0.4673, lr_0 = 4.6732e-04
Loss = 1.1830e-02, PNorm = 155.8002, GNorm = 0.1872, lr_0 = 4.6700e-04
Loss = 1.1133e-02, PNorm = 155.8295, GNorm = 0.2208, lr_0 = 4.6668e-04
Loss = 1.1787e-02, PNorm = 155.8530, GNorm = 0.4728, lr_0 = 4.6636e-04
Loss = 1.1083e-02, PNorm = 155.8781, GNorm = 0.2141, lr_0 = 4.6604e-04
Loss = 1.3332e-02, PNorm = 155.9055, GNorm = 0.2221, lr_0 = 4.6572e-04
Loss = 1.3366e-02, PNorm = 155.9377, GNorm = 0.2905, lr_0 = 4.6540e-04
Loss = 1.2905e-02, PNorm = 155.9630, GNorm = 0.1621, lr_0 = 4.6508e-04
Loss = 1.1670e-02, PNorm = 155.9909, GNorm = 0.6520, lr_0 = 4.6476e-04
Loss = 1.1311e-02, PNorm = 156.0164, GNorm = 0.1157, lr_0 = 4.6445e-04
Loss = 1.1283e-02, PNorm = 156.0393, GNorm = 0.1752, lr_0 = 4.6413e-04
Loss = 1.1972e-02, PNorm = 156.0627, GNorm = 0.1364, lr_0 = 4.6381e-04
Loss = 1.1642e-02, PNorm = 156.0855, GNorm = 0.2047, lr_0 = 4.6349e-04
Loss = 1.3762e-02, PNorm = 156.1107, GNorm = 1.3877, lr_0 = 4.6317e-04
Loss = 1.1610e-02, PNorm = 156.1321, GNorm = 0.6512, lr_0 = 4.6286e-04
Loss = 1.3196e-02, PNorm = 156.1566, GNorm = 0.4024, lr_0 = 4.6254e-04
Loss = 1.0668e-02, PNorm = 156.1832, GNorm = 0.1932, lr_0 = 4.6222e-04
Loss = 1.1941e-02, PNorm = 156.2068, GNorm = 0.4658, lr_0 = 4.6191e-04
Loss = 1.1459e-02, PNorm = 156.2280, GNorm = 0.2983, lr_0 = 4.6159e-04
Loss = 1.3291e-02, PNorm = 156.2509, GNorm = 0.2155, lr_0 = 4.6127e-04
Loss = 1.1140e-02, PNorm = 156.2842, GNorm = 0.2945, lr_0 = 4.6096e-04
Loss = 1.4177e-02, PNorm = 156.3105, GNorm = 0.1862, lr_0 = 4.6064e-04
Loss = 1.1959e-02, PNorm = 156.3409, GNorm = 0.3586, lr_0 = 4.6033e-04
Loss = 1.0242e-02, PNorm = 156.3666, GNorm = 0.1816, lr_0 = 4.6001e-04
Loss = 1.0688e-02, PNorm = 156.3958, GNorm = 0.1205, lr_0 = 4.5970e-04
Loss = 1.1225e-02, PNorm = 156.4233, GNorm = 0.3011, lr_0 = 4.5938e-04
Loss = 1.0316e-02, PNorm = 156.4504, GNorm = 0.2053, lr_0 = 4.5907e-04
Loss = 1.0539e-02, PNorm = 156.4744, GNorm = 0.2692, lr_0 = 4.5875e-04
Loss = 1.0711e-02, PNorm = 156.5032, GNorm = 0.5170, lr_0 = 4.5844e-04
Loss = 1.4018e-02, PNorm = 156.5279, GNorm = 0.4227, lr_0 = 4.5812e-04
Loss = 1.2240e-02, PNorm = 156.5538, GNorm = 0.2890, lr_0 = 4.5781e-04
Loss = 9.5662e-03, PNorm = 156.5830, GNorm = 0.1492, lr_0 = 4.5750e-04
Loss = 1.2763e-02, PNorm = 156.6134, GNorm = 0.2172, lr_0 = 4.5718e-04
Loss = 1.2978e-02, PNorm = 156.6408, GNorm = 0.6074, lr_0 = 4.5687e-04
Loss = 1.1614e-02, PNorm = 156.6733, GNorm = 0.2361, lr_0 = 4.5656e-04
Loss = 9.5296e-03, PNorm = 156.7005, GNorm = 0.1861, lr_0 = 4.5624e-04
Loss = 1.1330e-02, PNorm = 156.7260, GNorm = 0.1231, lr_0 = 4.5593e-04
Loss = 1.3328e-02, PNorm = 156.7529, GNorm = 0.2532, lr_0 = 4.5562e-04
Loss = 1.3522e-02, PNorm = 156.7823, GNorm = 0.4442, lr_0 = 4.5531e-04
Loss = 1.0629e-02, PNorm = 156.8170, GNorm = 0.1111, lr_0 = 4.5499e-04
Loss = 1.0058e-02, PNorm = 156.8390, GNorm = 0.2922, lr_0 = 4.5468e-04
Loss = 1.2028e-02, PNorm = 156.8708, GNorm = 0.1661, lr_0 = 4.5437e-04
Loss = 1.0654e-02, PNorm = 156.8988, GNorm = 0.2674, lr_0 = 4.5406e-04
Loss = 1.0790e-02, PNorm = 156.9249, GNorm = 0.2992, lr_0 = 4.5375e-04
Loss = 1.1341e-02, PNorm = 156.9441, GNorm = 0.5692, lr_0 = 4.5344e-04
Loss = 1.1530e-02, PNorm = 156.9667, GNorm = 0.1070, lr_0 = 4.5313e-04
Loss = 1.1133e-02, PNorm = 156.9949, GNorm = 0.2145, lr_0 = 4.5282e-04
Loss = 1.2408e-02, PNorm = 157.0175, GNorm = 0.2067, lr_0 = 4.5251e-04
Loss = 1.2921e-02, PNorm = 157.0428, GNorm = 0.2814, lr_0 = 4.5220e-04
Loss = 9.9002e-03, PNorm = 157.0707, GNorm = 0.5165, lr_0 = 4.5189e-04
Loss = 1.1010e-02, PNorm = 157.0999, GNorm = 0.1376, lr_0 = 4.5158e-04
Loss = 1.3149e-02, PNorm = 157.1258, GNorm = 0.2066, lr_0 = 4.5127e-04
Loss = 1.1331e-02, PNorm = 157.1553, GNorm = 0.1934, lr_0 = 4.5096e-04
Loss = 1.1178e-02, PNorm = 157.1849, GNorm = 0.2994, lr_0 = 4.5065e-04
Loss = 1.1220e-02, PNorm = 157.2102, GNorm = 0.4020, lr_0 = 4.5034e-04
Loss = 1.0525e-02, PNorm = 157.2390, GNorm = 0.2517, lr_0 = 4.5003e-04
Loss = 1.0324e-02, PNorm = 157.2642, GNorm = 0.1332, lr_0 = 4.4972e-04
Loss = 1.2631e-02, PNorm = 157.2914, GNorm = 0.2129, lr_0 = 4.4942e-04
Loss = 9.2104e-03, PNorm = 157.3160, GNorm = 0.3670, lr_0 = 4.4911e-04
Loss = 1.0850e-02, PNorm = 157.3397, GNorm = 0.1928, lr_0 = 4.4880e-04
Loss = 1.3972e-02, PNorm = 157.3639, GNorm = 0.2071, lr_0 = 4.4849e-04
Loss = 1.0149e-02, PNorm = 157.3918, GNorm = 0.4424, lr_0 = 4.4819e-04
Loss = 1.0594e-02, PNorm = 157.4171, GNorm = 0.3580, lr_0 = 4.4788e-04
Loss = 1.2830e-02, PNorm = 157.4452, GNorm = 0.1339, lr_0 = 4.4757e-04
Loss = 1.0685e-02, PNorm = 157.4720, GNorm = 0.3210, lr_0 = 4.4727e-04
Loss = 1.1475e-02, PNorm = 157.4996, GNorm = 0.6322, lr_0 = 4.4696e-04
Loss = 1.3387e-02, PNorm = 157.5303, GNorm = 0.5552, lr_0 = 4.4665e-04
Loss = 1.4094e-02, PNorm = 157.5582, GNorm = 0.1446, lr_0 = 4.4635e-04
Loss = 1.4511e-02, PNorm = 157.5904, GNorm = 0.6198, lr_0 = 4.4604e-04
Loss = 1.0331e-02, PNorm = 157.6232, GNorm = 0.1876, lr_0 = 4.4574e-04
Loss = 1.0672e-02, PNorm = 157.6492, GNorm = 0.1244, lr_0 = 4.4543e-04
Loss = 1.0275e-02, PNorm = 157.6819, GNorm = 0.4629, lr_0 = 4.4513e-04
Loss = 1.0740e-02, PNorm = 157.7115, GNorm = 0.3193, lr_0 = 4.4482e-04
Loss = 1.1636e-02, PNorm = 157.7405, GNorm = 0.1889, lr_0 = 4.4452e-04
Loss = 1.4663e-02, PNorm = 157.7668, GNorm = 0.1454, lr_0 = 4.4421e-04
Loss = 1.5270e-02, PNorm = 157.7988, GNorm = 0.1802, lr_0 = 4.4391e-04
Loss = 1.1107e-02, PNorm = 157.8271, GNorm = 0.1839, lr_0 = 4.4360e-04
Loss = 1.1403e-02, PNorm = 157.8540, GNorm = 0.1439, lr_0 = 4.4330e-04
Loss = 1.3953e-02, PNorm = 157.8860, GNorm = 0.1501, lr_0 = 4.4299e-04
Loss = 1.0254e-02, PNorm = 157.9163, GNorm = 0.3252, lr_0 = 4.4269e-04
Loss = 1.1886e-02, PNorm = 157.9455, GNorm = 0.2526, lr_0 = 4.4239e-04
Loss = 1.2589e-02, PNorm = 157.9740, GNorm = 0.4129, lr_0 = 4.4209e-04
Loss = 1.3439e-02, PNorm = 158.0042, GNorm = 0.3554, lr_0 = 4.4178e-04
Loss = 1.2252e-02, PNorm = 158.0345, GNorm = 0.4135, lr_0 = 4.4148e-04
Loss = 1.4104e-02, PNorm = 158.0618, GNorm = 0.2071, lr_0 = 4.4118e-04
Loss = 1.2929e-02, PNorm = 158.0903, GNorm = 0.2335, lr_0 = 4.4088e-04
Loss = 1.2564e-02, PNorm = 158.1159, GNorm = 0.2286, lr_0 = 4.4057e-04
Loss = 1.7268e-02, PNorm = 158.1422, GNorm = 0.3230, lr_0 = 4.4027e-04
Loss = 1.1834e-02, PNorm = 158.1751, GNorm = 0.5672, lr_0 = 4.3997e-04
Loss = 1.1437e-02, PNorm = 158.2075, GNorm = 0.4261, lr_0 = 4.3967e-04
Loss = 1.0418e-02, PNorm = 158.2411, GNorm = 0.2044, lr_0 = 4.3937e-04
Validation mae = 0.280045
Epoch 12
Loss = 1.1559e-02, PNorm = 158.2637, GNorm = 0.3063, lr_0 = 4.3907e-04
Loss = 1.3084e-02, PNorm = 158.2793, GNorm = 0.3499, lr_0 = 4.3877e-04
Loss = 1.0839e-02, PNorm = 158.3001, GNorm = 0.2643, lr_0 = 4.3846e-04
Loss = 1.0584e-02, PNorm = 158.3187, GNorm = 0.3405, lr_0 = 4.3816e-04
Loss = 1.1542e-02, PNorm = 158.3367, GNorm = 0.3821, lr_0 = 4.3786e-04
Loss = 1.1017e-02, PNorm = 158.3590, GNorm = 0.1921, lr_0 = 4.3756e-04
Loss = 9.5860e-03, PNorm = 158.3794, GNorm = 0.3399, lr_0 = 4.3726e-04
Loss = 1.0331e-02, PNorm = 158.3977, GNorm = 0.3092, lr_0 = 4.3696e-04
Loss = 1.2819e-02, PNorm = 158.4170, GNorm = 0.2348, lr_0 = 4.3667e-04
Loss = 1.0580e-02, PNorm = 158.4369, GNorm = 0.1839, lr_0 = 4.3637e-04
Loss = 1.0270e-02, PNorm = 158.4606, GNorm = 0.3483, lr_0 = 4.3607e-04
Loss = 1.0067e-02, PNorm = 158.4815, GNorm = 0.3632, lr_0 = 4.3577e-04
Loss = 9.4298e-03, PNorm = 158.5047, GNorm = 0.3173, lr_0 = 4.3547e-04
Loss = 1.0382e-02, PNorm = 158.5237, GNorm = 0.2673, lr_0 = 4.3517e-04
Loss = 1.1811e-02, PNorm = 158.5444, GNorm = 0.3772, lr_0 = 4.3487e-04
Loss = 1.2194e-02, PNorm = 158.5626, GNorm = 0.3287, lr_0 = 4.3458e-04
Loss = 1.2767e-02, PNorm = 158.5821, GNorm = 0.2423, lr_0 = 4.3428e-04
Loss = 9.7727e-03, PNorm = 158.6029, GNorm = 0.2003, lr_0 = 4.3398e-04
Loss = 1.2420e-02, PNorm = 158.6241, GNorm = 0.3045, lr_0 = 4.3368e-04
Loss = 9.7259e-03, PNorm = 158.6464, GNorm = 0.2199, lr_0 = 4.3339e-04
Loss = 9.9137e-03, PNorm = 158.6681, GNorm = 0.1894, lr_0 = 4.3309e-04
Loss = 1.0437e-02, PNorm = 158.6850, GNorm = 0.4806, lr_0 = 4.3279e-04
Loss = 9.4054e-03, PNorm = 158.7022, GNorm = 0.2396, lr_0 = 4.3250e-04
Loss = 9.6405e-03, PNorm = 158.7241, GNorm = 0.2468, lr_0 = 4.3220e-04
Loss = 1.1046e-02, PNorm = 158.7461, GNorm = 0.4165, lr_0 = 4.3190e-04
Loss = 9.5441e-03, PNorm = 158.7658, GNorm = 0.2625, lr_0 = 4.3161e-04
Loss = 8.6498e-03, PNorm = 158.7871, GNorm = 0.3137, lr_0 = 4.3131e-04
Loss = 1.0228e-02, PNorm = 158.8038, GNorm = 0.4737, lr_0 = 4.3102e-04
Loss = 8.9821e-03, PNorm = 158.8242, GNorm = 0.4169, lr_0 = 4.3072e-04
Loss = 1.3662e-02, PNorm = 158.8401, GNorm = 0.2966, lr_0 = 4.3043e-04
Loss = 1.0077e-02, PNorm = 158.8596, GNorm = 0.3893, lr_0 = 4.3013e-04
Loss = 9.2006e-03, PNorm = 158.8796, GNorm = 0.1683, lr_0 = 4.2984e-04
Loss = 1.2085e-02, PNorm = 158.9019, GNorm = 0.3561, lr_0 = 4.2954e-04
Loss = 1.1887e-02, PNorm = 158.9216, GNorm = 0.3374, lr_0 = 4.2925e-04
Loss = 9.4586e-03, PNorm = 158.9440, GNorm = 0.2816, lr_0 = 4.2895e-04
Loss = 1.0426e-02, PNorm = 158.9664, GNorm = 0.2420, lr_0 = 4.2866e-04
Loss = 1.0263e-02, PNorm = 158.9893, GNorm = 0.2639, lr_0 = 4.2837e-04
Loss = 7.8335e-03, PNorm = 159.0120, GNorm = 0.3270, lr_0 = 4.2807e-04
Loss = 9.0869e-03, PNorm = 159.0317, GNorm = 0.2715, lr_0 = 4.2778e-04
Loss = 9.0892e-03, PNorm = 159.0512, GNorm = 0.1393, lr_0 = 4.2749e-04
Loss = 7.8961e-03, PNorm = 159.0695, GNorm = 0.1240, lr_0 = 4.2719e-04
Loss = 9.9391e-03, PNorm = 159.0856, GNorm = 0.3986, lr_0 = 4.2690e-04
Loss = 1.1266e-02, PNorm = 159.1068, GNorm = 0.2748, lr_0 = 4.2661e-04
Loss = 1.1197e-02, PNorm = 159.1299, GNorm = 0.0916, lr_0 = 4.2632e-04
Loss = 1.1729e-02, PNorm = 159.1548, GNorm = 0.1270, lr_0 = 4.2602e-04
Loss = 1.0167e-02, PNorm = 159.1809, GNorm = 0.5215, lr_0 = 4.2573e-04
Loss = 9.9874e-03, PNorm = 159.2071, GNorm = 0.2917, lr_0 = 4.2544e-04
Loss = 8.7580e-03, PNorm = 159.2361, GNorm = 0.3016, lr_0 = 4.2515e-04
Loss = 1.0779e-02, PNorm = 159.2559, GNorm = 0.5482, lr_0 = 4.2486e-04
Loss = 1.0687e-02, PNorm = 159.2773, GNorm = 0.4026, lr_0 = 4.2457e-04
Loss = 9.1976e-03, PNorm = 159.2990, GNorm = 0.2647, lr_0 = 4.2428e-04
Loss = 1.0654e-02, PNorm = 159.3222, GNorm = 0.3000, lr_0 = 4.2399e-04
Loss = 9.7209e-03, PNorm = 159.3439, GNorm = 0.4049, lr_0 = 4.2370e-04
Loss = 1.2995e-02, PNorm = 159.3696, GNorm = 0.1207, lr_0 = 4.2340e-04
Loss = 9.6801e-03, PNorm = 159.3946, GNorm = 0.3443, lr_0 = 4.2311e-04
Loss = 1.0837e-02, PNorm = 159.4159, GNorm = 0.5736, lr_0 = 4.2283e-04
Loss = 9.4483e-03, PNorm = 159.4392, GNorm = 0.3477, lr_0 = 4.2254e-04
Loss = 1.1187e-02, PNorm = 159.4594, GNorm = 0.6692, lr_0 = 4.2225e-04
Loss = 9.5145e-03, PNorm = 159.4811, GNorm = 0.4104, lr_0 = 4.2196e-04
Loss = 8.8074e-03, PNorm = 159.5008, GNorm = 0.1293, lr_0 = 4.2167e-04
Loss = 9.8907e-03, PNorm = 159.5241, GNorm = 0.1197, lr_0 = 4.2138e-04
Loss = 1.1707e-02, PNorm = 159.5523, GNorm = 0.2253, lr_0 = 4.2109e-04
Loss = 1.1287e-02, PNorm = 159.5760, GNorm = 0.5011, lr_0 = 4.2080e-04
Loss = 9.8177e-03, PNorm = 159.5993, GNorm = 0.4945, lr_0 = 4.2051e-04
Loss = 1.0562e-02, PNorm = 159.6200, GNorm = 0.3351, lr_0 = 4.2023e-04
Loss = 1.0639e-02, PNorm = 159.6410, GNorm = 0.1496, lr_0 = 4.1994e-04
Loss = 9.3486e-03, PNorm = 159.6649, GNorm = 0.3920, lr_0 = 4.1965e-04
Loss = 1.1115e-02, PNorm = 159.6844, GNorm = 0.3658, lr_0 = 4.1936e-04
Loss = 1.1458e-02, PNorm = 159.7110, GNorm = 0.2214, lr_0 = 4.1907e-04
Loss = 1.3068e-02, PNorm = 159.7278, GNorm = 0.7239, lr_0 = 4.1879e-04
Loss = 1.0954e-02, PNorm = 159.7491, GNorm = 0.4248, lr_0 = 4.1850e-04
Loss = 1.0697e-02, PNorm = 159.7695, GNorm = 0.0816, lr_0 = 4.1821e-04
Loss = 9.4234e-03, PNorm = 159.7896, GNorm = 0.4545, lr_0 = 4.1793e-04
Loss = 1.1949e-02, PNorm = 159.8135, GNorm = 0.1487, lr_0 = 4.1764e-04
Loss = 1.1231e-02, PNorm = 159.8371, GNorm = 0.2143, lr_0 = 4.1736e-04
Loss = 8.9529e-03, PNorm = 159.8585, GNorm = 0.1440, lr_0 = 4.1707e-04
Loss = 1.3684e-02, PNorm = 159.8802, GNorm = 0.3327, lr_0 = 4.1678e-04
Loss = 1.0941e-02, PNorm = 159.9031, GNorm = 0.2952, lr_0 = 4.1650e-04
Loss = 1.1394e-02, PNorm = 159.9260, GNorm = 0.3245, lr_0 = 4.1621e-04
Loss = 1.0184e-02, PNorm = 159.9490, GNorm = 0.1793, lr_0 = 4.1593e-04
Loss = 1.1234e-02, PNorm = 159.9734, GNorm = 0.3252, lr_0 = 4.1564e-04
Loss = 1.0062e-02, PNorm = 159.9975, GNorm = 0.2991, lr_0 = 4.1536e-04
Loss = 1.2641e-02, PNorm = 160.0227, GNorm = 0.2532, lr_0 = 4.1507e-04
Loss = 1.0746e-02, PNorm = 160.0464, GNorm = 0.4675, lr_0 = 4.1479e-04
Loss = 1.0101e-02, PNorm = 160.0737, GNorm = 0.1591, lr_0 = 4.1450e-04
Loss = 1.0507e-02, PNorm = 160.0990, GNorm = 0.2302, lr_0 = 4.1422e-04
Loss = 9.8959e-03, PNorm = 160.1218, GNorm = 0.1359, lr_0 = 4.1394e-04
Loss = 1.1336e-02, PNorm = 160.1427, GNorm = 0.1806, lr_0 = 4.1365e-04
Loss = 1.0109e-02, PNorm = 160.1630, GNorm = 0.3484, lr_0 = 4.1337e-04
Loss = 9.2877e-03, PNorm = 160.1867, GNorm = 0.1650, lr_0 = 4.1309e-04
Loss = 1.0433e-02, PNorm = 160.2111, GNorm = 0.3605, lr_0 = 4.1280e-04
Loss = 9.9445e-03, PNorm = 160.2361, GNorm = 0.2215, lr_0 = 4.1252e-04
Loss = 9.2444e-03, PNorm = 160.2579, GNorm = 0.4676, lr_0 = 4.1224e-04
Loss = 9.4611e-03, PNorm = 160.2779, GNorm = 0.1883, lr_0 = 4.1196e-04
Loss = 1.0125e-02, PNorm = 160.2996, GNorm = 0.3241, lr_0 = 4.1167e-04
Loss = 1.0128e-02, PNorm = 160.3253, GNorm = 0.2655, lr_0 = 4.1139e-04
Loss = 9.5945e-03, PNorm = 160.3493, GNorm = 0.1077, lr_0 = 4.1111e-04
Loss = 1.1034e-02, PNorm = 160.3744, GNorm = 0.1656, lr_0 = 4.1083e-04
Loss = 1.1893e-02, PNorm = 160.3988, GNorm = 0.1744, lr_0 = 4.1055e-04
Loss = 7.4339e-03, PNorm = 160.4210, GNorm = 0.3311, lr_0 = 4.1027e-04
Loss = 1.1854e-02, PNorm = 160.4431, GNorm = 0.3405, lr_0 = 4.0998e-04
Loss = 1.0376e-02, PNorm = 160.4689, GNorm = 0.1417, lr_0 = 4.0970e-04
Loss = 1.2668e-02, PNorm = 160.4940, GNorm = 0.2268, lr_0 = 4.0942e-04
Loss = 8.6782e-03, PNorm = 160.5157, GNorm = 0.1964, lr_0 = 4.0914e-04
Loss = 8.9787e-03, PNorm = 160.5384, GNorm = 0.3443, lr_0 = 4.0886e-04
Loss = 1.0381e-02, PNorm = 160.5632, GNorm = 0.2182, lr_0 = 4.0858e-04
Loss = 9.8918e-03, PNorm = 160.5880, GNorm = 0.1881, lr_0 = 4.0830e-04
Loss = 1.0851e-02, PNorm = 160.6074, GNorm = 0.1448, lr_0 = 4.0802e-04
Loss = 1.1263e-02, PNorm = 160.6273, GNorm = 0.3386, lr_0 = 4.0774e-04
Loss = 1.2011e-02, PNorm = 160.6554, GNorm = 0.2312, lr_0 = 4.0746e-04
Loss = 8.3160e-03, PNorm = 160.6817, GNorm = 0.6101, lr_0 = 4.0718e-04
Loss = 1.1533e-02, PNorm = 160.7052, GNorm = 0.0884, lr_0 = 4.0691e-04
Loss = 9.5449e-03, PNorm = 160.7291, GNorm = 0.2461, lr_0 = 4.0663e-04
Loss = 1.2358e-02, PNorm = 160.7527, GNorm = 0.1632, lr_0 = 4.0635e-04
Loss = 9.9297e-03, PNorm = 160.7780, GNorm = 0.2632, lr_0 = 4.0607e-04
Loss = 1.0156e-02, PNorm = 160.8012, GNorm = 0.4196, lr_0 = 4.0579e-04
Loss = 9.2219e-03, PNorm = 160.8214, GNorm = 0.1068, lr_0 = 4.0551e-04
Loss = 1.1946e-02, PNorm = 160.8433, GNorm = 0.2963, lr_0 = 4.0524e-04
Loss = 9.8446e-03, PNorm = 160.8702, GNorm = 0.4185, lr_0 = 4.0496e-04
Loss = 1.0372e-02, PNorm = 160.8949, GNorm = 0.2787, lr_0 = 4.0468e-04
Validation mae = 0.281359
Epoch 13
Loss = 8.7052e-03, PNorm = 160.9093, GNorm = 0.1205, lr_0 = 4.0440e-04
Loss = 1.4816e-02, PNorm = 160.9225, GNorm = 0.7895, lr_0 = 4.0413e-04
Loss = 9.3715e-03, PNorm = 160.9368, GNorm = 0.3400, lr_0 = 4.0385e-04
Loss = 8.3164e-03, PNorm = 160.9513, GNorm = 0.3388, lr_0 = 4.0357e-04
Loss = 7.5384e-03, PNorm = 160.9657, GNorm = 0.3475, lr_0 = 4.0330e-04
Loss = 8.1230e-03, PNorm = 160.9813, GNorm = 0.0942, lr_0 = 4.0302e-04
Loss = 1.0406e-02, PNorm = 161.0018, GNorm = 0.3938, lr_0 = 4.0274e-04
Loss = 8.0463e-03, PNorm = 161.0232, GNorm = 0.5318, lr_0 = 4.0247e-04
Loss = 8.4886e-03, PNorm = 161.0405, GNorm = 0.1914, lr_0 = 4.0219e-04
Loss = 8.9970e-03, PNorm = 161.0564, GNorm = 0.2674, lr_0 = 4.0192e-04
Loss = 9.3068e-03, PNorm = 161.0711, GNorm = 0.2045, lr_0 = 4.0164e-04
Loss = 8.0222e-03, PNorm = 161.0886, GNorm = 0.1741, lr_0 = 4.0137e-04
Loss = 8.8195e-03, PNorm = 161.1056, GNorm = 0.3990, lr_0 = 4.0109e-04
Loss = 9.1287e-03, PNorm = 161.1182, GNorm = 0.3377, lr_0 = 4.0082e-04
Loss = 8.0961e-03, PNorm = 161.1325, GNorm = 0.2096, lr_0 = 4.0054e-04
Loss = 8.4390e-03, PNorm = 161.1484, GNorm = 0.2714, lr_0 = 4.0027e-04
Loss = 6.8808e-03, PNorm = 161.1648, GNorm = 0.3650, lr_0 = 3.9999e-04
Loss = 8.3469e-03, PNorm = 161.1817, GNorm = 0.3304, lr_0 = 3.9972e-04
Loss = 9.3594e-03, PNorm = 161.1974, GNorm = 0.3552, lr_0 = 3.9945e-04
Loss = 9.3698e-03, PNorm = 161.2148, GNorm = 0.3985, lr_0 = 3.9917e-04
Loss = 9.8648e-03, PNorm = 161.2360, GNorm = 0.2450, lr_0 = 3.9890e-04
Loss = 1.4208e-02, PNorm = 161.2560, GNorm = 0.3531, lr_0 = 3.9863e-04
Loss = 8.4486e-03, PNorm = 161.2743, GNorm = 0.2578, lr_0 = 3.9835e-04
Loss = 9.4932e-03, PNorm = 161.2952, GNorm = 0.3364, lr_0 = 3.9808e-04
Loss = 7.9107e-03, PNorm = 161.3140, GNorm = 0.3307, lr_0 = 3.9781e-04
Loss = 1.0696e-02, PNorm = 161.3321, GNorm = 0.4802, lr_0 = 3.9753e-04
Loss = 7.3034e-03, PNorm = 161.3475, GNorm = 0.1955, lr_0 = 3.9726e-04
Loss = 8.9370e-03, PNorm = 161.3617, GNorm = 0.3053, lr_0 = 3.9699e-04
Loss = 7.8261e-03, PNorm = 161.3781, GNorm = 0.1910, lr_0 = 3.9672e-04
Loss = 7.4781e-03, PNorm = 161.3914, GNorm = 0.2747, lr_0 = 3.9645e-04
Loss = 6.7504e-03, PNorm = 161.4057, GNorm = 0.4086, lr_0 = 3.9617e-04
Loss = 8.0664e-03, PNorm = 161.4200, GNorm = 0.2985, lr_0 = 3.9590e-04
Loss = 9.0855e-03, PNorm = 161.4344, GNorm = 0.1999, lr_0 = 3.9563e-04
Loss = 7.2459e-03, PNorm = 161.4509, GNorm = 0.4753, lr_0 = 3.9536e-04
Loss = 8.3064e-03, PNorm = 161.4703, GNorm = 0.1758, lr_0 = 3.9509e-04
Loss = 8.9247e-03, PNorm = 161.4882, GNorm = 0.5887, lr_0 = 3.9482e-04
Loss = 7.5215e-03, PNorm = 161.5026, GNorm = 0.3460, lr_0 = 3.9455e-04
Loss = 7.7456e-03, PNorm = 161.5160, GNorm = 0.2407, lr_0 = 3.9428e-04
Loss = 8.4886e-03, PNorm = 161.5337, GNorm = 0.2748, lr_0 = 3.9401e-04
Loss = 8.3294e-03, PNorm = 161.5494, GNorm = 0.3489, lr_0 = 3.9374e-04
Loss = 8.3095e-03, PNorm = 161.5641, GNorm = 0.0977, lr_0 = 3.9347e-04
Loss = 5.9730e-03, PNorm = 161.5798, GNorm = 0.0795, lr_0 = 3.9320e-04
Loss = 7.0074e-03, PNorm = 161.5948, GNorm = 0.5259, lr_0 = 3.9293e-04
Loss = 9.3102e-03, PNorm = 161.6118, GNorm = 0.2445, lr_0 = 3.9266e-04
Loss = 8.0763e-03, PNorm = 161.6298, GNorm = 0.1764, lr_0 = 3.9239e-04
Loss = 8.2107e-03, PNorm = 161.6477, GNorm = 0.1652, lr_0 = 3.9212e-04
Loss = 9.4682e-03, PNorm = 161.6646, GNorm = 0.3220, lr_0 = 3.9185e-04
Loss = 1.1059e-02, PNorm = 161.6822, GNorm = 0.3487, lr_0 = 3.9159e-04
Loss = 9.6911e-03, PNorm = 161.7026, GNorm = 0.1824, lr_0 = 3.9132e-04
Loss = 8.4164e-03, PNorm = 161.7197, GNorm = 0.2635, lr_0 = 3.9105e-04
Loss = 6.7306e-03, PNorm = 161.7389, GNorm = 0.5428, lr_0 = 3.9078e-04
Loss = 1.1636e-02, PNorm = 161.7557, GNorm = 0.3538, lr_0 = 3.9051e-04
Loss = 7.4193e-03, PNorm = 161.7744, GNorm = 0.2851, lr_0 = 3.9025e-04
Loss = 8.8097e-03, PNorm = 161.7953, GNorm = 0.1892, lr_0 = 3.8998e-04
Loss = 1.0436e-02, PNorm = 161.8166, GNorm = 0.2363, lr_0 = 3.8971e-04
Loss = 7.6553e-03, PNorm = 161.8367, GNorm = 0.2060, lr_0 = 3.8945e-04
Loss = 8.6420e-03, PNorm = 161.8544, GNorm = 0.1699, lr_0 = 3.8918e-04
Loss = 9.0542e-03, PNorm = 161.8764, GNorm = 0.0722, lr_0 = 3.8891e-04
Loss = 8.4886e-03, PNorm = 161.8950, GNorm = 0.2211, lr_0 = 3.8865e-04
Loss = 7.9143e-03, PNorm = 161.9198, GNorm = 0.1414, lr_0 = 3.8838e-04
Loss = 9.9339e-03, PNorm = 161.9419, GNorm = 0.1764, lr_0 = 3.8811e-04
Loss = 7.1396e-03, PNorm = 161.9637, GNorm = 0.2275, lr_0 = 3.8785e-04
Loss = 9.8911e-03, PNorm = 161.9863, GNorm = 0.3341, lr_0 = 3.8758e-04
Loss = 8.0125e-03, PNorm = 162.0068, GNorm = 0.2444, lr_0 = 3.8732e-04
Loss = 8.3234e-03, PNorm = 162.0258, GNorm = 0.2458, lr_0 = 3.8705e-04
Loss = 7.8797e-03, PNorm = 162.0429, GNorm = 0.1642, lr_0 = 3.8679e-04
Loss = 8.0229e-03, PNorm = 162.0585, GNorm = 0.1056, lr_0 = 3.8652e-04
Loss = 8.3223e-03, PNorm = 162.0768, GNorm = 0.3739, lr_0 = 3.8626e-04
Loss = 7.9393e-03, PNorm = 162.0953, GNorm = 0.1428, lr_0 = 3.8599e-04
Loss = 6.4967e-03, PNorm = 162.1178, GNorm = 0.2470, lr_0 = 3.8573e-04
Loss = 8.0998e-03, PNorm = 162.1374, GNorm = 0.1456, lr_0 = 3.8546e-04
Loss = 6.7839e-03, PNorm = 162.1542, GNorm = 0.1388, lr_0 = 3.8520e-04
Loss = 8.2467e-03, PNorm = 162.1721, GNorm = 0.7480, lr_0 = 3.8493e-04
Loss = 7.7132e-03, PNorm = 162.1890, GNorm = 0.3663, lr_0 = 3.8467e-04
Loss = 9.3587e-03, PNorm = 162.2076, GNorm = 0.3953, lr_0 = 3.8441e-04
Loss = 9.2705e-03, PNorm = 162.2286, GNorm = 0.1122, lr_0 = 3.8414e-04
Loss = 7.3085e-03, PNorm = 162.2513, GNorm = 0.3312, lr_0 = 3.8388e-04
Loss = 9.5231e-03, PNorm = 162.2699, GNorm = 0.1960, lr_0 = 3.8362e-04
Loss = 7.2505e-03, PNorm = 162.2880, GNorm = 0.1307, lr_0 = 3.8336e-04
Loss = 8.7752e-03, PNorm = 162.3041, GNorm = 0.3882, lr_0 = 3.8309e-04
Loss = 7.7061e-03, PNorm = 162.3231, GNorm = 0.2039, lr_0 = 3.8283e-04
Loss = 8.1525e-03, PNorm = 162.3422, GNorm = 0.3173, lr_0 = 3.8257e-04
Loss = 9.5185e-03, PNorm = 162.3633, GNorm = 0.3484, lr_0 = 3.8231e-04
Loss = 7.5831e-03, PNorm = 162.3826, GNorm = 0.2879, lr_0 = 3.8204e-04
Loss = 7.7093e-03, PNorm = 162.4012, GNorm = 0.1955, lr_0 = 3.8178e-04
Loss = 7.0920e-03, PNorm = 162.4187, GNorm = 0.1610, lr_0 = 3.8152e-04
Loss = 9.3945e-03, PNorm = 162.4347, GNorm = 0.3597, lr_0 = 3.8126e-04
Loss = 7.6816e-03, PNorm = 162.4578, GNorm = 0.2474, lr_0 = 3.8100e-04
Loss = 8.5331e-03, PNorm = 162.4825, GNorm = 0.2874, lr_0 = 3.8074e-04
Loss = 7.6851e-03, PNorm = 162.5015, GNorm = 0.2647, lr_0 = 3.8048e-04
Loss = 7.1054e-03, PNorm = 162.5199, GNorm = 0.1434, lr_0 = 3.8022e-04
Loss = 9.7855e-03, PNorm = 162.5375, GNorm = 0.3897, lr_0 = 3.7995e-04
Loss = 7.0554e-03, PNorm = 162.5579, GNorm = 0.1522, lr_0 = 3.7969e-04
Loss = 6.6637e-03, PNorm = 162.5751, GNorm = 0.1796, lr_0 = 3.7943e-04
Loss = 8.7642e-03, PNorm = 162.5921, GNorm = 0.3582, lr_0 = 3.7917e-04
Loss = 9.7367e-03, PNorm = 162.6108, GNorm = 0.1583, lr_0 = 3.7891e-04
Loss = 1.0450e-02, PNorm = 162.6340, GNorm = 0.1868, lr_0 = 3.7866e-04
Loss = 9.5738e-03, PNorm = 162.6560, GNorm = 0.5757, lr_0 = 3.7840e-04
Loss = 1.1107e-02, PNorm = 162.6791, GNorm = 0.2004, lr_0 = 3.7814e-04
Loss = 9.3489e-03, PNorm = 162.6986, GNorm = 0.1080, lr_0 = 3.7788e-04
Loss = 8.8305e-03, PNorm = 162.7167, GNorm = 0.3314, lr_0 = 3.7762e-04
Loss = 9.0109e-03, PNorm = 162.7341, GNorm = 0.2243, lr_0 = 3.7736e-04
Loss = 1.1260e-02, PNorm = 162.7547, GNorm = 0.3587, lr_0 = 3.7710e-04
Loss = 9.4543e-03, PNorm = 162.7753, GNorm = 0.1304, lr_0 = 3.7684e-04
Loss = 8.9139e-03, PNorm = 162.7936, GNorm = 0.1081, lr_0 = 3.7659e-04
Loss = 7.7118e-03, PNorm = 162.8155, GNorm = 0.4196, lr_0 = 3.7633e-04
Loss = 1.0517e-02, PNorm = 162.8302, GNorm = 0.1882, lr_0 = 3.7607e-04
Loss = 1.0890e-02, PNorm = 162.8500, GNorm = 0.5281, lr_0 = 3.7581e-04
Loss = 7.1745e-03, PNorm = 162.8703, GNorm = 0.1308, lr_0 = 3.7555e-04
Loss = 8.0130e-03, PNorm = 162.8878, GNorm = 0.1803, lr_0 = 3.7530e-04
Loss = 6.4380e-03, PNorm = 162.9062, GNorm = 0.1027, lr_0 = 3.7504e-04
Loss = 7.5165e-03, PNorm = 162.9227, GNorm = 0.1964, lr_0 = 3.7478e-04
Loss = 9.6119e-03, PNorm = 162.9425, GNorm = 0.2731, lr_0 = 3.7453e-04
Loss = 7.8617e-03, PNorm = 162.9637, GNorm = 0.1042, lr_0 = 3.7427e-04
Loss = 7.9120e-03, PNorm = 162.9888, GNorm = 0.1655, lr_0 = 3.7401e-04
Loss = 9.7803e-03, PNorm = 163.0112, GNorm = 0.4517, lr_0 = 3.7376e-04
Loss = 9.8931e-03, PNorm = 163.0304, GNorm = 0.2397, lr_0 = 3.7350e-04
Loss = 8.1633e-03, PNorm = 163.0509, GNorm = 0.2667, lr_0 = 3.7325e-04
Loss = 8.4810e-03, PNorm = 163.0689, GNorm = 0.1332, lr_0 = 3.7299e-04
Loss = 1.0091e-02, PNorm = 163.0846, GNorm = 0.1620, lr_0 = 3.7273e-04
Validation mae = 0.280665
Epoch 14
Loss = 8.5158e-03, PNorm = 163.0980, GNorm = 0.2755, lr_0 = 3.7248e-04
Loss = 7.9490e-03, PNorm = 163.1147, GNorm = 0.4313, lr_0 = 3.7222e-04
Loss = 8.3419e-03, PNorm = 163.1281, GNorm = 0.1772, lr_0 = 3.7197e-04
Loss = 7.4645e-03, PNorm = 163.1408, GNorm = 0.4693, lr_0 = 3.7171e-04
Loss = 7.4325e-03, PNorm = 163.1520, GNorm = 0.1976, lr_0 = 3.7146e-04
Loss = 8.2032e-03, PNorm = 163.1657, GNorm = 0.2919, lr_0 = 3.7120e-04
Loss = 6.6491e-03, PNorm = 163.1801, GNorm = 0.1181, lr_0 = 3.7095e-04
Loss = 8.4142e-03, PNorm = 163.1978, GNorm = 0.3948, lr_0 = 3.7070e-04
Loss = 6.3428e-03, PNorm = 163.2146, GNorm = 0.2897, lr_0 = 3.7044e-04
Loss = 6.3001e-03, PNorm = 163.2296, GNorm = 0.1006, lr_0 = 3.7019e-04
Loss = 7.6587e-03, PNorm = 163.2440, GNorm = 0.2371, lr_0 = 3.6993e-04
Loss = 8.9831e-03, PNorm = 163.2544, GNorm = 0.2266, lr_0 = 3.6968e-04
Loss = 7.1870e-03, PNorm = 163.2629, GNorm = 0.4482, lr_0 = 3.6943e-04
Loss = 7.8520e-03, PNorm = 163.2826, GNorm = 0.1933, lr_0 = 3.6917e-04
Loss = 7.0287e-03, PNorm = 163.2996, GNorm = 0.1643, lr_0 = 3.6892e-04
Loss = 7.5839e-03, PNorm = 163.3140, GNorm = 0.4288, lr_0 = 3.6867e-04
Loss = 6.2482e-03, PNorm = 163.3261, GNorm = 0.2472, lr_0 = 3.6842e-04
Loss = 8.3847e-03, PNorm = 163.3344, GNorm = 0.4135, lr_0 = 3.6816e-04
Loss = 5.0808e-03, PNorm = 163.3428, GNorm = 0.1486, lr_0 = 3.6791e-04
Loss = 8.0194e-03, PNorm = 163.3546, GNorm = 0.4719, lr_0 = 3.6766e-04
Loss = 6.4170e-03, PNorm = 163.3709, GNorm = 0.4025, lr_0 = 3.6741e-04
Loss = 6.4236e-03, PNorm = 163.3851, GNorm = 0.2731, lr_0 = 3.6716e-04
Loss = 6.3071e-03, PNorm = 163.3999, GNorm = 0.3229, lr_0 = 3.6690e-04
Loss = 5.5692e-03, PNorm = 163.4127, GNorm = 0.2114, lr_0 = 3.6665e-04
Loss = 8.5025e-03, PNorm = 163.4261, GNorm = 0.5484, lr_0 = 3.6640e-04
Loss = 8.5558e-03, PNorm = 163.4432, GNorm = 0.3940, lr_0 = 3.6615e-04
Loss = 6.8728e-03, PNorm = 163.4583, GNorm = 0.1778, lr_0 = 3.6590e-04
Loss = 6.4347e-03, PNorm = 163.4759, GNorm = 0.2042, lr_0 = 3.6565e-04
Loss = 6.7433e-03, PNorm = 163.4885, GNorm = 0.2422, lr_0 = 3.6540e-04
Loss = 8.5327e-03, PNorm = 163.5025, GNorm = 0.6461, lr_0 = 3.6515e-04
Loss = 6.4026e-03, PNorm = 163.5159, GNorm = 0.2920, lr_0 = 3.6490e-04
Loss = 7.0255e-03, PNorm = 163.5319, GNorm = 0.1880, lr_0 = 3.6465e-04
Loss = 7.0719e-03, PNorm = 163.5457, GNorm = 0.2824, lr_0 = 3.6440e-04
Loss = 5.9634e-03, PNorm = 163.5572, GNorm = 0.1847, lr_0 = 3.6415e-04
Loss = 9.0969e-03, PNorm = 163.5713, GNorm = 0.4576, lr_0 = 3.6390e-04
Loss = 9.8921e-03, PNorm = 163.5880, GNorm = 0.4232, lr_0 = 3.6365e-04
Loss = 6.6989e-03, PNorm = 163.6080, GNorm = 0.1133, lr_0 = 3.6340e-04
Loss = 6.0210e-03, PNorm = 163.6235, GNorm = 0.4908, lr_0 = 3.6315e-04
Loss = 6.2716e-03, PNorm = 163.6372, GNorm = 0.4266, lr_0 = 3.6290e-04
Loss = 6.4071e-03, PNorm = 163.6506, GNorm = 0.1878, lr_0 = 3.6266e-04
Loss = 6.6221e-03, PNorm = 163.6658, GNorm = 0.2302, lr_0 = 3.6241e-04
Loss = 6.3062e-03, PNorm = 163.6834, GNorm = 0.1844, lr_0 = 3.6216e-04
Loss = 6.4774e-03, PNorm = 163.6972, GNorm = 0.2092, lr_0 = 3.6191e-04
Loss = 5.3779e-03, PNorm = 163.7122, GNorm = 0.1329, lr_0 = 3.6166e-04
Loss = 7.3866e-03, PNorm = 163.7263, GNorm = 0.1843, lr_0 = 3.6141e-04
Loss = 6.1851e-03, PNorm = 163.7403, GNorm = 0.1145, lr_0 = 3.6117e-04
Loss = 7.5246e-03, PNorm = 163.7547, GNorm = 0.3871, lr_0 = 3.6092e-04
Loss = 1.0283e-02, PNorm = 163.7723, GNorm = 0.4856, lr_0 = 3.6067e-04
Loss = 6.9747e-03, PNorm = 163.7880, GNorm = 0.7628, lr_0 = 3.6043e-04
Loss = 7.1261e-03, PNorm = 163.8050, GNorm = 0.3999, lr_0 = 3.6018e-04
Loss = 6.2959e-03, PNorm = 163.8207, GNorm = 0.1077, lr_0 = 3.5993e-04
Loss = 7.5488e-03, PNorm = 163.8364, GNorm = 0.2153, lr_0 = 3.5969e-04
Loss = 6.6837e-03, PNorm = 163.8486, GNorm = 0.1604, lr_0 = 3.5944e-04
Loss = 6.1495e-03, PNorm = 163.8646, GNorm = 0.2436, lr_0 = 3.5919e-04
Loss = 8.0519e-03, PNorm = 163.8792, GNorm = 0.2331, lr_0 = 3.5895e-04
Loss = 8.5964e-03, PNorm = 163.8937, GNorm = 0.3294, lr_0 = 3.5870e-04
Loss = 7.0928e-03, PNorm = 163.9118, GNorm = 0.2888, lr_0 = 3.5845e-04
Loss = 6.3901e-03, PNorm = 163.9286, GNorm = 0.4834, lr_0 = 3.5821e-04
Loss = 5.2415e-03, PNorm = 163.9441, GNorm = 0.3034, lr_0 = 3.5796e-04
Loss = 6.0273e-03, PNorm = 163.9600, GNorm = 0.1046, lr_0 = 3.5772e-04
Loss = 6.6586e-03, PNorm = 163.9747, GNorm = 0.1674, lr_0 = 3.5747e-04
Loss = 6.9356e-03, PNorm = 163.9914, GNorm = 0.2285, lr_0 = 3.5723e-04
Loss = 8.8652e-03, PNorm = 164.0070, GNorm = 0.1531, lr_0 = 3.5698e-04
Loss = 6.7358e-03, PNorm = 164.0222, GNorm = 0.2430, lr_0 = 3.5674e-04
Loss = 6.3369e-03, PNorm = 164.0354, GNorm = 0.0778, lr_0 = 3.5650e-04
Loss = 6.7314e-03, PNorm = 164.0524, GNorm = 0.2112, lr_0 = 3.5625e-04
Loss = 6.0744e-03, PNorm = 164.0673, GNorm = 0.2469, lr_0 = 3.5601e-04
Loss = 6.1804e-03, PNorm = 164.0836, GNorm = 0.1740, lr_0 = 3.5576e-04
Loss = 5.6567e-03, PNorm = 164.1019, GNorm = 0.1744, lr_0 = 3.5552e-04
Loss = 1.0204e-02, PNorm = 164.1190, GNorm = 0.2355, lr_0 = 3.5528e-04
Loss = 8.0324e-03, PNorm = 164.1332, GNorm = 0.3134, lr_0 = 3.5503e-04
Loss = 5.6222e-03, PNorm = 164.1476, GNorm = 0.1793, lr_0 = 3.5479e-04
Loss = 5.8156e-03, PNorm = 164.1606, GNorm = 0.1142, lr_0 = 3.5455e-04
Loss = 5.8422e-03, PNorm = 164.1722, GNorm = 0.3615, lr_0 = 3.5430e-04
Loss = 6.8501e-03, PNorm = 164.1867, GNorm = 0.2281, lr_0 = 3.5406e-04
Loss = 1.0089e-02, PNorm = 164.2001, GNorm = 0.0996, lr_0 = 3.5382e-04
Loss = 7.0333e-03, PNorm = 164.2133, GNorm = 0.2276, lr_0 = 3.5358e-04
Loss = 7.0733e-03, PNorm = 164.2319, GNorm = 0.3147, lr_0 = 3.5333e-04
Loss = 6.2708e-03, PNorm = 164.2480, GNorm = 0.1993, lr_0 = 3.5309e-04
Loss = 6.4006e-03, PNorm = 164.2630, GNorm = 0.1707, lr_0 = 3.5285e-04
Loss = 7.1781e-03, PNorm = 164.2794, GNorm = 0.1582, lr_0 = 3.5261e-04
Loss = 6.8811e-03, PNorm = 164.2921, GNorm = 0.1718, lr_0 = 3.5237e-04
Loss = 6.9406e-03, PNorm = 164.3056, GNorm = 0.2023, lr_0 = 3.5212e-04
Loss = 6.4626e-03, PNorm = 164.3205, GNorm = 0.2801, lr_0 = 3.5188e-04
Loss = 6.5907e-03, PNorm = 164.3368, GNorm = 0.2692, lr_0 = 3.5164e-04
Loss = 8.3745e-03, PNorm = 164.3544, GNorm = 0.1101, lr_0 = 3.5140e-04
Loss = 6.3558e-03, PNorm = 164.3690, GNorm = 0.1789, lr_0 = 3.5116e-04
Loss = 7.6484e-03, PNorm = 164.3828, GNorm = 0.1768, lr_0 = 3.5092e-04
Loss = 6.2929e-03, PNorm = 164.3972, GNorm = 0.2092, lr_0 = 3.5068e-04
Loss = 8.6642e-03, PNorm = 164.4143, GNorm = 0.1296, lr_0 = 3.5044e-04
Loss = 8.2770e-03, PNorm = 164.4306, GNorm = 0.4313, lr_0 = 3.5020e-04
Loss = 1.1776e-02, PNorm = 164.4496, GNorm = 0.3262, lr_0 = 3.4996e-04
Loss = 8.2628e-03, PNorm = 164.4688, GNorm = 0.2692, lr_0 = 3.4972e-04
Loss = 6.6742e-03, PNorm = 164.4854, GNorm = 0.2431, lr_0 = 3.4948e-04
Loss = 6.5917e-03, PNorm = 164.5002, GNorm = 0.1440, lr_0 = 3.4924e-04
Loss = 9.5517e-03, PNorm = 164.5139, GNorm = 0.2215, lr_0 = 3.4900e-04
Loss = 7.1746e-03, PNorm = 164.5309, GNorm = 0.2326, lr_0 = 3.4876e-04
Loss = 7.5676e-03, PNorm = 164.5466, GNorm = 0.3849, lr_0 = 3.4852e-04
Loss = 7.0259e-03, PNorm = 164.5590, GNorm = 0.2666, lr_0 = 3.4828e-04
Loss = 7.9424e-03, PNorm = 164.5747, GNorm = 0.1743, lr_0 = 3.4805e-04
Loss = 8.9413e-03, PNorm = 164.5911, GNorm = 0.2103, lr_0 = 3.4781e-04
Loss = 8.2909e-03, PNorm = 164.6115, GNorm = 0.3050, lr_0 = 3.4757e-04
Loss = 8.7878e-03, PNorm = 164.6270, GNorm = 0.5282, lr_0 = 3.4733e-04
Loss = 6.7622e-03, PNorm = 164.6457, GNorm = 0.2082, lr_0 = 3.4709e-04
Loss = 9.6053e-03, PNorm = 164.6652, GNorm = 0.5338, lr_0 = 3.4686e-04
Loss = 8.1865e-03, PNorm = 164.6815, GNorm = 0.3105, lr_0 = 3.4662e-04
Loss = 5.8651e-03, PNorm = 164.6951, GNorm = 0.5485, lr_0 = 3.4638e-04
Loss = 7.0322e-03, PNorm = 164.7111, GNorm = 0.2108, lr_0 = 3.4614e-04
Loss = 5.9457e-03, PNorm = 164.7269, GNorm = 0.1083, lr_0 = 3.4591e-04
Loss = 6.4317e-03, PNorm = 164.7414, GNorm = 0.2589, lr_0 = 3.4567e-04
Loss = 6.3891e-03, PNorm = 164.7586, GNorm = 0.3525, lr_0 = 3.4543e-04
Loss = 6.3199e-03, PNorm = 164.7767, GNorm = 0.2049, lr_0 = 3.4520e-04
Loss = 5.9605e-03, PNorm = 164.7945, GNorm = 0.2070, lr_0 = 3.4496e-04
Loss = 6.2912e-03, PNorm = 164.8075, GNorm = 0.1483, lr_0 = 3.4472e-04
Loss = 5.7496e-03, PNorm = 164.8225, GNorm = 0.1167, lr_0 = 3.4449e-04
Loss = 1.0181e-02, PNorm = 164.8333, GNorm = 0.1932, lr_0 = 3.4425e-04
Loss = 8.4161e-03, PNorm = 164.8525, GNorm = 0.0736, lr_0 = 3.4402e-04
Loss = 7.0810e-03, PNorm = 164.8706, GNorm = 0.2284, lr_0 = 3.4378e-04
Loss = 7.4135e-03, PNorm = 164.8848, GNorm = 0.1163, lr_0 = 3.4354e-04
Loss = 6.5865e-03, PNorm = 164.8986, GNorm = 0.3163, lr_0 = 3.4331e-04
Validation mae = 0.279867
Epoch 15
Loss = 1.0332e-02, PNorm = 164.9101, GNorm = 0.3291, lr_0 = 3.4307e-04
Loss = 6.2498e-03, PNorm = 164.9222, GNorm = 0.1444, lr_0 = 3.4284e-04
Loss = 6.1131e-03, PNorm = 164.9337, GNorm = 0.2188, lr_0 = 3.4260e-04
Loss = 5.5470e-03, PNorm = 164.9459, GNorm = 0.2598, lr_0 = 3.4237e-04
Loss = 5.2605e-03, PNorm = 164.9556, GNorm = 0.3565, lr_0 = 3.4213e-04
Loss = 5.8610e-03, PNorm = 164.9672, GNorm = 0.2247, lr_0 = 3.4190e-04
Loss = 5.7150e-03, PNorm = 164.9795, GNorm = 0.3445, lr_0 = 3.4167e-04
Loss = 5.9739e-03, PNorm = 164.9905, GNorm = 0.1576, lr_0 = 3.4143e-04
Loss = 6.0011e-03, PNorm = 165.0001, GNorm = 0.0932, lr_0 = 3.4120e-04
Loss = 6.0508e-03, PNorm = 165.0083, GNorm = 0.1466, lr_0 = 3.4096e-04
Loss = 6.3867e-03, PNorm = 165.0163, GNorm = 0.2554, lr_0 = 3.4073e-04
Loss = 7.2551e-03, PNorm = 165.0292, GNorm = 0.4964, lr_0 = 3.4050e-04
Loss = 5.9314e-03, PNorm = 165.0426, GNorm = 0.0850, lr_0 = 3.4026e-04
Loss = 6.2786e-03, PNorm = 165.0584, GNorm = 0.2065, lr_0 = 3.4003e-04
Loss = 6.1961e-03, PNorm = 165.0712, GNorm = 0.3853, lr_0 = 3.3980e-04
Loss = 6.9645e-03, PNorm = 165.0827, GNorm = 0.2359, lr_0 = 3.3956e-04
Loss = 5.6185e-03, PNorm = 165.0933, GNorm = 0.2224, lr_0 = 3.3933e-04
Loss = 7.0955e-03, PNorm = 165.1034, GNorm = 0.1949, lr_0 = 3.3910e-04
Loss = 5.3348e-03, PNorm = 165.1154, GNorm = 0.3014, lr_0 = 3.3887e-04
Loss = 8.0854e-03, PNorm = 165.1252, GNorm = 0.0815, lr_0 = 3.3864e-04
Loss = 5.5247e-03, PNorm = 165.1368, GNorm = 0.6283, lr_0 = 3.3840e-04
Loss = 5.5671e-03, PNorm = 165.1519, GNorm = 0.1172, lr_0 = 3.3817e-04
Loss = 4.7638e-03, PNorm = 165.1641, GNorm = 0.3539, lr_0 = 3.3794e-04
Loss = 6.2811e-03, PNorm = 165.1752, GNorm = 0.1908, lr_0 = 3.3771e-04
Loss = 4.9646e-03, PNorm = 165.1825, GNorm = 0.1282, lr_0 = 3.3748e-04
Loss = 5.4469e-03, PNorm = 165.1925, GNorm = 0.1139, lr_0 = 3.3725e-04
Loss = 5.8923e-03, PNorm = 165.2045, GNorm = 0.1210, lr_0 = 3.3701e-04
Loss = 5.5116e-03, PNorm = 165.2164, GNorm = 0.1528, lr_0 = 3.3678e-04
Loss = 7.3921e-03, PNorm = 165.2283, GNorm = 0.4733, lr_0 = 3.3655e-04
Loss = 5.9134e-03, PNorm = 165.2397, GNorm = 0.1994, lr_0 = 3.3632e-04
Loss = 5.5826e-03, PNorm = 165.2550, GNorm = 0.1730, lr_0 = 3.3609e-04
Loss = 5.9628e-03, PNorm = 165.2686, GNorm = 0.1394, lr_0 = 3.3586e-04
Loss = 5.9956e-03, PNorm = 165.2804, GNorm = 0.2611, lr_0 = 3.3563e-04
Loss = 7.2212e-03, PNorm = 165.2940, GNorm = 0.0943, lr_0 = 3.3540e-04
Loss = 5.7863e-03, PNorm = 165.3060, GNorm = 0.4019, lr_0 = 3.3517e-04
Loss = 5.8190e-03, PNorm = 165.3188, GNorm = 0.3300, lr_0 = 3.3494e-04
Loss = 6.7535e-03, PNorm = 165.3280, GNorm = 0.4777, lr_0 = 3.3471e-04
Loss = 5.7209e-03, PNorm = 165.3398, GNorm = 0.1350, lr_0 = 3.3448e-04
Loss = 5.1977e-03, PNorm = 165.3546, GNorm = 0.2131, lr_0 = 3.3425e-04
Loss = 5.7248e-03, PNorm = 165.3686, GNorm = 0.1400, lr_0 = 3.3403e-04
Loss = 7.0184e-03, PNorm = 165.3814, GNorm = 0.2950, lr_0 = 3.3380e-04
Loss = 6.9610e-03, PNorm = 165.3928, GNorm = 0.3365, lr_0 = 3.3357e-04
Loss = 5.2088e-03, PNorm = 165.4030, GNorm = 0.1390, lr_0 = 3.3334e-04
Loss = 6.6243e-03, PNorm = 165.4130, GNorm = 0.2144, lr_0 = 3.3311e-04
Loss = 5.8331e-03, PNorm = 165.4234, GNorm = 0.1247, lr_0 = 3.3288e-04
Loss = 5.7349e-03, PNorm = 165.4359, GNorm = 0.2446, lr_0 = 3.3265e-04
Loss = 6.9954e-03, PNorm = 165.4502, GNorm = 0.1516, lr_0 = 3.3243e-04
Loss = 5.3574e-03, PNorm = 165.4623, GNorm = 0.4998, lr_0 = 3.3220e-04
Loss = 7.2935e-03, PNorm = 165.4716, GNorm = 0.1952, lr_0 = 3.3197e-04
Loss = 5.7061e-03, PNorm = 165.4823, GNorm = 0.1255, lr_0 = 3.3174e-04
Loss = 7.3744e-03, PNorm = 165.4924, GNorm = 0.2144, lr_0 = 3.3152e-04
Loss = 4.5823e-03, PNorm = 165.5043, GNorm = 0.3486, lr_0 = 3.3129e-04
Loss = 8.4137e-03, PNorm = 165.5146, GNorm = 0.6783, lr_0 = 3.3106e-04
Loss = 5.2417e-03, PNorm = 165.5266, GNorm = 0.2744, lr_0 = 3.3084e-04
Loss = 5.8066e-03, PNorm = 165.5394, GNorm = 0.1374, lr_0 = 3.3061e-04
Loss = 5.6842e-03, PNorm = 165.5540, GNorm = 0.1279, lr_0 = 3.3038e-04
Loss = 5.5579e-03, PNorm = 165.5667, GNorm = 0.1934, lr_0 = 3.3016e-04
Loss = 5.6762e-03, PNorm = 165.5772, GNorm = 0.1352, lr_0 = 3.2993e-04
Loss = 5.9269e-03, PNorm = 165.5896, GNorm = 0.1665, lr_0 = 3.2970e-04
Loss = 5.7565e-03, PNorm = 165.6009, GNorm = 0.3118, lr_0 = 3.2948e-04
Loss = 5.0505e-03, PNorm = 165.6157, GNorm = 0.1345, lr_0 = 3.2925e-04
Loss = 6.0664e-03, PNorm = 165.6271, GNorm = 0.3790, lr_0 = 3.2903e-04
Loss = 5.6429e-03, PNorm = 165.6375, GNorm = 0.2651, lr_0 = 3.2880e-04
Loss = 4.6795e-03, PNorm = 165.6497, GNorm = 0.1912, lr_0 = 3.2858e-04
Loss = 5.5518e-03, PNorm = 165.6617, GNorm = 0.2640, lr_0 = 3.2835e-04
Loss = 6.1612e-03, PNorm = 165.6768, GNorm = 0.3706, lr_0 = 3.2813e-04
Loss = 5.8265e-03, PNorm = 165.6909, GNorm = 0.1385, lr_0 = 3.2790e-04
Loss = 5.6652e-03, PNorm = 165.7054, GNorm = 0.2577, lr_0 = 3.2768e-04
Loss = 6.6943e-03, PNorm = 165.7205, GNorm = 0.1390, lr_0 = 3.2745e-04
Loss = 5.8460e-03, PNorm = 165.7311, GNorm = 0.1017, lr_0 = 3.2723e-04
Loss = 6.4499e-03, PNorm = 165.7422, GNorm = 0.4976, lr_0 = 3.2700e-04
Loss = 5.2193e-03, PNorm = 165.7529, GNorm = 0.3479, lr_0 = 3.2678e-04
Loss = 7.7091e-03, PNorm = 165.7650, GNorm = 0.1507, lr_0 = 3.2656e-04
Loss = 5.5246e-03, PNorm = 165.7804, GNorm = 0.0805, lr_0 = 3.2633e-04
Loss = 6.0648e-03, PNorm = 165.7954, GNorm = 0.1722, lr_0 = 3.2611e-04
Loss = 7.9594e-03, PNorm = 165.8118, GNorm = 0.2680, lr_0 = 3.2589e-04
Loss = 5.9304e-03, PNorm = 165.8252, GNorm = 0.1712, lr_0 = 3.2566e-04
Loss = 5.6155e-03, PNorm = 165.8358, GNorm = 0.4387, lr_0 = 3.2544e-04
Loss = 8.0197e-03, PNorm = 165.8475, GNorm = 0.1252, lr_0 = 3.2522e-04
Loss = 6.5623e-03, PNorm = 165.8594, GNorm = 0.6040, lr_0 = 3.2499e-04
Loss = 8.2527e-03, PNorm = 165.8706, GNorm = 0.0876, lr_0 = 3.2477e-04
Loss = 6.7359e-03, PNorm = 165.8856, GNorm = 0.3126, lr_0 = 3.2455e-04
Loss = 5.2231e-03, PNorm = 165.8977, GNorm = 0.3486, lr_0 = 3.2433e-04
Loss = 5.3152e-03, PNorm = 165.9101, GNorm = 0.3224, lr_0 = 3.2410e-04
Loss = 5.3806e-03, PNorm = 165.9230, GNorm = 0.1639, lr_0 = 3.2388e-04
Loss = 5.8243e-03, PNorm = 165.9355, GNorm = 0.3943, lr_0 = 3.2366e-04
Loss = 6.9037e-03, PNorm = 165.9462, GNorm = 0.1122, lr_0 = 3.2344e-04
Loss = 6.5486e-03, PNorm = 165.9559, GNorm = 0.3017, lr_0 = 3.2322e-04
Loss = 6.9192e-03, PNorm = 165.9669, GNorm = 0.1714, lr_0 = 3.2300e-04
Loss = 5.0364e-03, PNorm = 165.9830, GNorm = 0.1468, lr_0 = 3.2277e-04
Loss = 8.0218e-03, PNorm = 165.9967, GNorm = 0.4041, lr_0 = 3.2255e-04
Loss = 4.6263e-03, PNorm = 166.0105, GNorm = 0.1809, lr_0 = 3.2233e-04
Loss = 5.2651e-03, PNorm = 166.0235, GNorm = 0.2172, lr_0 = 3.2211e-04
Loss = 6.1930e-03, PNorm = 166.0404, GNorm = 0.2040, lr_0 = 3.2189e-04
Loss = 6.6187e-03, PNorm = 166.0580, GNorm = 0.1745, lr_0 = 3.2167e-04
Loss = 5.3698e-03, PNorm = 166.0710, GNorm = 0.3462, lr_0 = 3.2145e-04
Loss = 6.4799e-03, PNorm = 166.0826, GNorm = 0.1721, lr_0 = 3.2123e-04
Loss = 8.3941e-03, PNorm = 166.0949, GNorm = 0.2762, lr_0 = 3.2101e-04
Loss = 7.6676e-03, PNorm = 166.1093, GNorm = 0.2677, lr_0 = 3.2079e-04
Loss = 8.1138e-03, PNorm = 166.1243, GNorm = 0.2942, lr_0 = 3.2057e-04
Loss = 4.8888e-03, PNorm = 166.1375, GNorm = 0.3520, lr_0 = 3.2035e-04
Loss = 5.6568e-03, PNorm = 166.1522, GNorm = 0.3132, lr_0 = 3.2013e-04
Loss = 4.9284e-03, PNorm = 166.1658, GNorm = 0.0737, lr_0 = 3.1991e-04
Loss = 6.0295e-03, PNorm = 166.1780, GNorm = 0.1589, lr_0 = 3.1969e-04
Loss = 6.4742e-03, PNorm = 166.1954, GNorm = 0.2375, lr_0 = 3.1947e-04
Loss = 6.3696e-03, PNorm = 166.2114, GNorm = 0.1733, lr_0 = 3.1925e-04
Loss = 1.0459e-02, PNorm = 166.2211, GNorm = 0.3995, lr_0 = 3.1904e-04
Loss = 1.0217e-02, PNorm = 166.2348, GNorm = 0.1443, lr_0 = 3.1882e-04
Loss = 6.9202e-03, PNorm = 166.2501, GNorm = 0.2396, lr_0 = 3.1860e-04
Loss = 5.2453e-03, PNorm = 166.2656, GNorm = 0.2293, lr_0 = 3.1838e-04
Loss = 6.9081e-03, PNorm = 166.2846, GNorm = 0.2759, lr_0 = 3.1816e-04
Loss = 5.9776e-03, PNorm = 166.2969, GNorm = 0.2145, lr_0 = 3.1794e-04
Loss = 4.6788e-03, PNorm = 166.3094, GNorm = 0.1613, lr_0 = 3.1773e-04
Loss = 6.6820e-03, PNorm = 166.3229, GNorm = 0.1303, lr_0 = 3.1751e-04
Loss = 6.1646e-03, PNorm = 166.3378, GNorm = 0.0947, lr_0 = 3.1729e-04
Loss = 4.9683e-03, PNorm = 166.3508, GNorm = 0.3014, lr_0 = 3.1707e-04
Loss = 5.6195e-03, PNorm = 166.3648, GNorm = 0.4546, lr_0 = 3.1686e-04
Loss = 5.0661e-03, PNorm = 166.3744, GNorm = 0.1752, lr_0 = 3.1664e-04
Loss = 6.4147e-03, PNorm = 166.3883, GNorm = 0.4167, lr_0 = 3.1642e-04
Loss = 7.9750e-03, PNorm = 166.4042, GNorm = 0.2678, lr_0 = 3.1621e-04
Validation mae = 0.279031
Epoch 16
Loss = 4.8147e-03, PNorm = 166.4176, GNorm = 0.2842, lr_0 = 3.1599e-04
Loss = 5.5437e-03, PNorm = 166.4287, GNorm = 0.1201, lr_0 = 3.1577e-04
Loss = 6.4682e-03, PNorm = 166.4380, GNorm = 0.3368, lr_0 = 3.1556e-04
Loss = 7.4351e-03, PNorm = 166.4486, GNorm = 0.2385, lr_0 = 3.1534e-04
Loss = 5.3282e-03, PNorm = 166.4582, GNorm = 0.2271, lr_0 = 3.1512e-04
Loss = 5.3312e-03, PNorm = 166.4686, GNorm = 0.3128, lr_0 = 3.1491e-04
Loss = 7.1320e-03, PNorm = 166.4807, GNorm = 0.1569, lr_0 = 3.1469e-04
Loss = 5.2094e-03, PNorm = 166.4916, GNorm = 0.1927, lr_0 = 3.1448e-04
Loss = 4.5925e-03, PNorm = 166.5044, GNorm = 0.1960, lr_0 = 3.1426e-04
Loss = 6.2165e-03, PNorm = 166.5136, GNorm = 0.1343, lr_0 = 3.1405e-04
Loss = 5.2733e-03, PNorm = 166.5229, GNorm = 0.2189, lr_0 = 3.1383e-04
Loss = 5.6213e-03, PNorm = 166.5338, GNorm = 0.4584, lr_0 = 3.1362e-04
Loss = 5.5508e-03, PNorm = 166.5425, GNorm = 0.2015, lr_0 = 3.1340e-04
Loss = 5.0680e-03, PNorm = 166.5535, GNorm = 0.4289, lr_0 = 3.1319e-04
Loss = 7.3420e-03, PNorm = 166.5652, GNorm = 0.3922, lr_0 = 3.1297e-04
Loss = 5.1849e-03, PNorm = 166.5787, GNorm = 0.1510, lr_0 = 3.1276e-04
Loss = 5.5565e-03, PNorm = 166.5876, GNorm = 0.2530, lr_0 = 3.1254e-04
Loss = 7.4987e-03, PNorm = 166.6005, GNorm = 0.4746, lr_0 = 3.1233e-04
Loss = 5.2274e-03, PNorm = 166.6095, GNorm = 0.2524, lr_0 = 3.1212e-04
Loss = 4.8504e-03, PNorm = 166.6203, GNorm = 0.3139, lr_0 = 3.1190e-04
Loss = 6.5062e-03, PNorm = 166.6287, GNorm = 0.3560, lr_0 = 3.1169e-04
Loss = 4.6288e-03, PNorm = 166.6373, GNorm = 0.4337, lr_0 = 3.1147e-04
Loss = 5.0557e-03, PNorm = 166.6474, GNorm = 0.2277, lr_0 = 3.1126e-04
Loss = 5.5318e-03, PNorm = 166.6590, GNorm = 0.3597, lr_0 = 3.1105e-04
Loss = 5.3567e-03, PNorm = 166.6691, GNorm = 0.0953, lr_0 = 3.1083e-04
Loss = 6.0750e-03, PNorm = 166.6757, GNorm = 0.2430, lr_0 = 3.1062e-04
Loss = 4.5921e-03, PNorm = 166.6837, GNorm = 0.2659, lr_0 = 3.1041e-04
Loss = 1.0399e-02, PNorm = 166.6927, GNorm = 0.6160, lr_0 = 3.1020e-04
Loss = 5.6287e-03, PNorm = 166.7033, GNorm = 0.1952, lr_0 = 3.0998e-04
Loss = 4.1909e-03, PNorm = 166.7143, GNorm = 0.2100, lr_0 = 3.0977e-04
Loss = 4.8128e-03, PNorm = 166.7222, GNorm = 0.2080, lr_0 = 3.0956e-04
Loss = 5.0720e-03, PNorm = 166.7342, GNorm = 0.3385, lr_0 = 3.0935e-04
Loss = 5.3098e-03, PNorm = 166.7449, GNorm = 0.2860, lr_0 = 3.0914e-04
Loss = 4.6016e-03, PNorm = 166.7541, GNorm = 0.1842, lr_0 = 3.0892e-04
Loss = 6.2679e-03, PNorm = 166.7685, GNorm = 0.1451, lr_0 = 3.0871e-04
Loss = 5.3667e-03, PNorm = 166.7799, GNorm = 0.1394, lr_0 = 3.0850e-04
Loss = 4.2005e-03, PNorm = 166.7911, GNorm = 0.3537, lr_0 = 3.0829e-04
Loss = 5.4775e-03, PNorm = 166.7994, GNorm = 0.1627, lr_0 = 3.0808e-04
Loss = 5.9779e-03, PNorm = 166.8104, GNorm = 0.4861, lr_0 = 3.0787e-04
Loss = 5.8696e-03, PNorm = 166.8170, GNorm = 0.1891, lr_0 = 3.0766e-04
Loss = 4.3442e-03, PNorm = 166.8245, GNorm = 0.2449, lr_0 = 3.0745e-04
Loss = 5.3081e-03, PNorm = 166.8326, GNorm = 0.2188, lr_0 = 3.0723e-04
Loss = 4.3733e-03, PNorm = 166.8440, GNorm = 0.2508, lr_0 = 3.0702e-04
Loss = 4.1302e-03, PNorm = 166.8557, GNorm = 0.4224, lr_0 = 3.0681e-04
Loss = 5.7016e-03, PNorm = 166.8676, GNorm = 0.2164, lr_0 = 3.0660e-04
Loss = 4.9048e-03, PNorm = 166.8787, GNorm = 0.0554, lr_0 = 3.0639e-04
Loss = 7.6174e-03, PNorm = 166.8916, GNorm = 0.3077, lr_0 = 3.0618e-04
Loss = 4.9016e-03, PNorm = 166.9022, GNorm = 0.2881, lr_0 = 3.0597e-04
Loss = 4.5658e-03, PNorm = 166.9110, GNorm = 0.2827, lr_0 = 3.0576e-04
Loss = 4.3488e-03, PNorm = 166.9177, GNorm = 0.2981, lr_0 = 3.0555e-04
Loss = 4.5279e-03, PNorm = 166.9285, GNorm = 0.1934, lr_0 = 3.0535e-04
Loss = 5.2661e-03, PNorm = 166.9391, GNorm = 0.3561, lr_0 = 3.0514e-04
Loss = 4.6999e-03, PNorm = 166.9511, GNorm = 0.1547, lr_0 = 3.0493e-04
Loss = 4.9184e-03, PNorm = 166.9603, GNorm = 0.1288, lr_0 = 3.0472e-04
Loss = 4.8331e-03, PNorm = 166.9758, GNorm = 0.1127, lr_0 = 3.0451e-04
Loss = 4.9145e-03, PNorm = 166.9883, GNorm = 0.2429, lr_0 = 3.0430e-04
Loss = 5.3648e-03, PNorm = 166.9960, GNorm = 0.0635, lr_0 = 3.0409e-04
Loss = 5.3584e-03, PNorm = 167.0037, GNorm = 0.0772, lr_0 = 3.0388e-04
Loss = 4.1913e-03, PNorm = 167.0124, GNorm = 0.2018, lr_0 = 3.0368e-04
Loss = 5.3573e-03, PNorm = 167.0228, GNorm = 0.2215, lr_0 = 3.0347e-04
Loss = 4.4837e-03, PNorm = 167.0339, GNorm = 0.2510, lr_0 = 3.0326e-04
Loss = 4.2755e-03, PNorm = 167.0437, GNorm = 0.1789, lr_0 = 3.0305e-04
Loss = 6.8963e-03, PNorm = 167.0546, GNorm = 0.3144, lr_0 = 3.0284e-04
Loss = 4.7006e-03, PNorm = 167.0640, GNorm = 0.2691, lr_0 = 3.0264e-04
Loss = 3.9584e-03, PNorm = 167.0725, GNorm = 0.1616, lr_0 = 3.0243e-04
Loss = 4.1157e-03, PNorm = 167.0808, GNorm = 0.3572, lr_0 = 3.0222e-04
Loss = 6.5651e-03, PNorm = 167.0930, GNorm = 0.1054, lr_0 = 3.0202e-04
Loss = 4.3433e-03, PNorm = 167.1025, GNorm = 0.1963, lr_0 = 3.0181e-04
Loss = 5.7161e-03, PNorm = 167.1120, GNorm = 0.1768, lr_0 = 3.0160e-04
Loss = 9.2600e-03, PNorm = 167.1242, GNorm = 0.3441, lr_0 = 3.0140e-04
Loss = 6.1546e-03, PNorm = 167.1364, GNorm = 0.1576, lr_0 = 3.0119e-04
Loss = 4.5667e-03, PNorm = 167.1490, GNorm = 0.1355, lr_0 = 3.0098e-04
Loss = 5.0476e-03, PNorm = 167.1626, GNorm = 0.1878, lr_0 = 3.0078e-04
Loss = 5.9251e-03, PNorm = 167.1765, GNorm = 0.0523, lr_0 = 3.0057e-04
Loss = 5.0388e-03, PNorm = 167.1923, GNorm = 0.4909, lr_0 = 3.0036e-04
Loss = 6.1497e-03, PNorm = 167.2045, GNorm = 0.2290, lr_0 = 3.0016e-04
Loss = 4.7975e-03, PNorm = 167.2143, GNorm = 0.2395, lr_0 = 2.9995e-04
Loss = 4.7010e-03, PNorm = 167.2230, GNorm = 0.1292, lr_0 = 2.9975e-04
Loss = 4.9676e-03, PNorm = 167.2310, GNorm = 0.1362, lr_0 = 2.9954e-04
Loss = 5.8106e-03, PNorm = 167.2393, GNorm = 0.2079, lr_0 = 2.9934e-04
Loss = 6.9112e-03, PNorm = 167.2465, GNorm = 0.4438, lr_0 = 2.9913e-04
Loss = 4.9019e-03, PNorm = 167.2586, GNorm = 0.2266, lr_0 = 2.9893e-04
Loss = 6.4051e-03, PNorm = 167.2721, GNorm = 0.3882, lr_0 = 2.9872e-04
Loss = 5.4712e-03, PNorm = 167.2843, GNorm = 0.1062, lr_0 = 2.9852e-04
Loss = 5.3809e-03, PNorm = 167.2950, GNorm = 0.1316, lr_0 = 2.9831e-04
Loss = 6.9312e-03, PNorm = 167.3084, GNorm = 0.2048, lr_0 = 2.9811e-04
Loss = 4.6525e-03, PNorm = 167.3206, GNorm = 0.1181, lr_0 = 2.9790e-04
Loss = 5.1597e-03, PNorm = 167.3314, GNorm = 0.4827, lr_0 = 2.9770e-04
Loss = 5.0652e-03, PNorm = 167.3412, GNorm = 0.2030, lr_0 = 2.9750e-04
Loss = 4.2801e-03, PNorm = 167.3541, GNorm = 0.3165, lr_0 = 2.9729e-04
Loss = 5.9642e-03, PNorm = 167.3671, GNorm = 0.1050, lr_0 = 2.9709e-04
Loss = 6.6987e-03, PNorm = 167.3798, GNorm = 0.1746, lr_0 = 2.9689e-04
Loss = 4.4344e-03, PNorm = 167.3935, GNorm = 0.2145, lr_0 = 2.9668e-04
Loss = 4.7122e-03, PNorm = 167.4046, GNorm = 0.0825, lr_0 = 2.9648e-04
Loss = 5.0664e-03, PNorm = 167.4154, GNorm = 0.2356, lr_0 = 2.9628e-04
Loss = 4.8476e-03, PNorm = 167.4274, GNorm = 0.2045, lr_0 = 2.9607e-04
Loss = 5.4030e-03, PNorm = 167.4359, GNorm = 0.4439, lr_0 = 2.9587e-04
Loss = 4.0864e-03, PNorm = 167.4458, GNorm = 0.1592, lr_0 = 2.9567e-04
Loss = 7.0387e-03, PNorm = 167.4558, GNorm = 0.1815, lr_0 = 2.9546e-04
Loss = 4.5824e-03, PNorm = 167.4652, GNorm = 0.2817, lr_0 = 2.9526e-04
Loss = 6.5062e-03, PNorm = 167.4775, GNorm = 0.1042, lr_0 = 2.9506e-04
Loss = 4.3456e-03, PNorm = 167.4889, GNorm = 0.1224, lr_0 = 2.9486e-04
Loss = 4.7330e-03, PNorm = 167.5005, GNorm = 0.0948, lr_0 = 2.9466e-04
Loss = 5.1185e-03, PNorm = 167.5124, GNorm = 0.1691, lr_0 = 2.9445e-04
Loss = 4.2140e-03, PNorm = 167.5239, GNorm = 0.0883, lr_0 = 2.9425e-04
Loss = 6.7574e-03, PNorm = 167.5343, GNorm = 0.2039, lr_0 = 2.9405e-04
Loss = 5.0852e-03, PNorm = 167.5482, GNorm = 0.1066, lr_0 = 2.9385e-04
Loss = 5.7219e-03, PNorm = 167.5602, GNorm = 0.4486, lr_0 = 2.9365e-04
Loss = 4.9254e-03, PNorm = 167.5756, GNorm = 0.3260, lr_0 = 2.9345e-04
Loss = 4.3925e-03, PNorm = 167.5856, GNorm = 0.1822, lr_0 = 2.9325e-04
Loss = 5.0988e-03, PNorm = 167.5985, GNorm = 0.5317, lr_0 = 2.9305e-04
Loss = 5.5244e-03, PNorm = 167.6086, GNorm = 0.2980, lr_0 = 2.9284e-04
Loss = 5.0928e-03, PNorm = 167.6211, GNorm = 0.2056, lr_0 = 2.9264e-04
Loss = 5.4275e-03, PNorm = 167.6311, GNorm = 0.2080, lr_0 = 2.9244e-04
Loss = 4.4886e-03, PNorm = 167.6426, GNorm = 0.0941, lr_0 = 2.9224e-04
Loss = 5.4144e-03, PNorm = 167.6564, GNorm = 0.2159, lr_0 = 2.9204e-04
Loss = 4.9754e-03, PNorm = 167.6678, GNorm = 0.2919, lr_0 = 2.9184e-04
Loss = 7.5258e-03, PNorm = 167.6769, GNorm = 0.2951, lr_0 = 2.9164e-04
Loss = 4.9163e-03, PNorm = 167.6885, GNorm = 0.1539, lr_0 = 2.9144e-04
Loss = 5.9692e-03, PNorm = 167.7000, GNorm = 0.2094, lr_0 = 2.9124e-04
Validation mae = 0.278498
Epoch 17
Loss = 4.9995e-03, PNorm = 167.7113, GNorm = 0.1775, lr_0 = 2.9104e-04
Loss = 4.8251e-03, PNorm = 167.7195, GNorm = 0.2055, lr_0 = 2.9084e-04
Loss = 4.5480e-03, PNorm = 167.7269, GNorm = 0.2516, lr_0 = 2.9065e-04
Loss = 3.5271e-03, PNorm = 167.7378, GNorm = 0.1990, lr_0 = 2.9045e-04
Loss = 4.9100e-03, PNorm = 167.7426, GNorm = 0.0962, lr_0 = 2.9025e-04
Loss = 4.3347e-03, PNorm = 167.7518, GNorm = 0.1628, lr_0 = 2.9005e-04
Loss = 5.0683e-03, PNorm = 167.7589, GNorm = 0.1048, lr_0 = 2.8985e-04
Loss = 3.9567e-03, PNorm = 167.7654, GNorm = 0.1601, lr_0 = 2.8965e-04
Loss = 5.3411e-03, PNorm = 167.7735, GNorm = 0.1667, lr_0 = 2.8945e-04
Loss = 4.3537e-03, PNorm = 167.7790, GNorm = 0.4155, lr_0 = 2.8925e-04
Loss = 4.0374e-03, PNorm = 167.7860, GNorm = 0.0937, lr_0 = 2.8906e-04
Loss = 6.2119e-03, PNorm = 167.7961, GNorm = 0.1346, lr_0 = 2.8886e-04
Loss = 4.5179e-03, PNorm = 167.8046, GNorm = 0.0736, lr_0 = 2.8866e-04
Loss = 4.0289e-03, PNorm = 167.8164, GNorm = 0.1864, lr_0 = 2.8846e-04
Loss = 5.0987e-03, PNorm = 167.8258, GNorm = 0.2641, lr_0 = 2.8826e-04
Loss = 5.3120e-03, PNorm = 167.8311, GNorm = 0.2010, lr_0 = 2.8807e-04
Loss = 4.8545e-03, PNorm = 167.8399, GNorm = 0.1461, lr_0 = 2.8787e-04
Loss = 4.4490e-03, PNorm = 167.8503, GNorm = 0.0997, lr_0 = 2.8767e-04
Loss = 6.0591e-03, PNorm = 167.8592, GNorm = 0.1053, lr_0 = 2.8748e-04
Loss = 4.7000e-03, PNorm = 167.8700, GNorm = 0.1521, lr_0 = 2.8728e-04
Loss = 4.3896e-03, PNorm = 167.8789, GNorm = 0.0831, lr_0 = 2.8708e-04
Loss = 6.6489e-03, PNorm = 167.8930, GNorm = 0.2985, lr_0 = 2.8689e-04
Loss = 4.7443e-03, PNorm = 167.9006, GNorm = 0.2515, lr_0 = 2.8669e-04
Loss = 5.2073e-03, PNorm = 167.9071, GNorm = 0.2207, lr_0 = 2.8649e-04
Loss = 6.2645e-03, PNorm = 167.9143, GNorm = 0.3917, lr_0 = 2.8630e-04
Loss = 4.2232e-03, PNorm = 167.9249, GNorm = 0.1593, lr_0 = 2.8610e-04
Loss = 3.8187e-03, PNorm = 167.9319, GNorm = 0.1133, lr_0 = 2.8590e-04
Loss = 4.9152e-03, PNorm = 167.9386, GNorm = 0.2269, lr_0 = 2.8571e-04
Loss = 4.2624e-03, PNorm = 167.9445, GNorm = 0.1193, lr_0 = 2.8551e-04
Loss = 3.5825e-03, PNorm = 167.9534, GNorm = 0.3329, lr_0 = 2.8532e-04
Loss = 4.5102e-03, PNorm = 167.9635, GNorm = 0.3267, lr_0 = 2.8512e-04
Loss = 5.2273e-03, PNorm = 167.9756, GNorm = 0.1029, lr_0 = 2.8493e-04
Loss = 4.0412e-03, PNorm = 167.9854, GNorm = 0.2989, lr_0 = 2.8473e-04
Loss = 3.8735e-03, PNorm = 167.9936, GNorm = 0.2251, lr_0 = 2.8454e-04
Loss = 3.9825e-03, PNorm = 167.9983, GNorm = 0.2914, lr_0 = 2.8434e-04
Loss = 4.3234e-03, PNorm = 168.0051, GNorm = 0.2418, lr_0 = 2.8415e-04
Loss = 4.7397e-03, PNorm = 168.0154, GNorm = 0.1949, lr_0 = 2.8395e-04
Loss = 4.5791e-03, PNorm = 168.0259, GNorm = 0.1958, lr_0 = 2.8376e-04
Loss = 4.0808e-03, PNorm = 168.0349, GNorm = 0.1143, lr_0 = 2.8356e-04
Loss = 3.4081e-03, PNorm = 168.0427, GNorm = 0.1566, lr_0 = 2.8337e-04
Loss = 3.7364e-03, PNorm = 168.0520, GNorm = 0.2809, lr_0 = 2.8317e-04
Loss = 3.5499e-03, PNorm = 168.0595, GNorm = 0.0859, lr_0 = 2.8298e-04
Loss = 4.6783e-03, PNorm = 168.0687, GNorm = 0.1091, lr_0 = 2.8279e-04
Loss = 3.7016e-03, PNorm = 168.0799, GNorm = 0.2769, lr_0 = 2.8259e-04
Loss = 3.4015e-03, PNorm = 168.0892, GNorm = 0.3850, lr_0 = 2.8240e-04
Loss = 3.8593e-03, PNorm = 168.0972, GNorm = 0.1611, lr_0 = 2.8221e-04
Loss = 5.6629e-03, PNorm = 168.1045, GNorm = 0.2792, lr_0 = 2.8201e-04
Loss = 3.7909e-03, PNorm = 168.1104, GNorm = 0.3613, lr_0 = 2.8182e-04
Loss = 6.4111e-03, PNorm = 168.1200, GNorm = 0.3704, lr_0 = 2.8163e-04
Loss = 5.1920e-03, PNorm = 168.1321, GNorm = 0.2946, lr_0 = 2.8143e-04
Loss = 5.4539e-03, PNorm = 168.1419, GNorm = 0.2062, lr_0 = 2.8124e-04
Loss = 6.6014e-03, PNorm = 168.1511, GNorm = 0.3813, lr_0 = 2.8105e-04
Loss = 3.5006e-03, PNorm = 168.1585, GNorm = 0.1074, lr_0 = 2.8085e-04
Loss = 6.1483e-03, PNorm = 168.1672, GNorm = 0.1171, lr_0 = 2.8066e-04
Loss = 5.8965e-03, PNorm = 168.1728, GNorm = 0.3289, lr_0 = 2.8047e-04
Loss = 5.1792e-03, PNorm = 168.1831, GNorm = 0.2417, lr_0 = 2.8028e-04
Loss = 3.9321e-03, PNorm = 168.1905, GNorm = 0.1682, lr_0 = 2.8009e-04
Loss = 4.3656e-03, PNorm = 168.1972, GNorm = 0.2483, lr_0 = 2.7989e-04
Loss = 5.3223e-03, PNorm = 168.2046, GNorm = 0.1151, lr_0 = 2.7970e-04
Loss = 4.6218e-03, PNorm = 168.2126, GNorm = 0.1243, lr_0 = 2.7951e-04
Loss = 3.9269e-03, PNorm = 168.2224, GNorm = 0.1504, lr_0 = 2.7932e-04
Loss = 4.5189e-03, PNorm = 168.2314, GNorm = 0.2506, lr_0 = 2.7913e-04
Loss = 3.7174e-03, PNorm = 168.2404, GNorm = 0.1403, lr_0 = 2.7894e-04
Loss = 4.6587e-03, PNorm = 168.2490, GNorm = 0.0972, lr_0 = 2.7875e-04
Loss = 4.6776e-03, PNorm = 168.2573, GNorm = 0.2825, lr_0 = 2.7855e-04
Loss = 5.4441e-03, PNorm = 168.2658, GNorm = 0.2035, lr_0 = 2.7836e-04
Loss = 5.7150e-03, PNorm = 168.2738, GNorm = 0.1895, lr_0 = 2.7817e-04
Loss = 4.3044e-03, PNorm = 168.2829, GNorm = 0.3311, lr_0 = 2.7798e-04
Loss = 4.6939e-03, PNorm = 168.2926, GNorm = 0.1728, lr_0 = 2.7779e-04
Loss = 4.5786e-03, PNorm = 168.3048, GNorm = 0.1438, lr_0 = 2.7760e-04
Loss = 3.5057e-03, PNorm = 168.3141, GNorm = 0.2211, lr_0 = 2.7741e-04
Loss = 3.0398e-03, PNorm = 168.3215, GNorm = 0.2338, lr_0 = 2.7722e-04
Loss = 5.5347e-03, PNorm = 168.3311, GNorm = 0.2383, lr_0 = 2.7703e-04
Loss = 4.6370e-03, PNorm = 168.3434, GNorm = 0.0621, lr_0 = 2.7684e-04
Loss = 4.6184e-03, PNorm = 168.3550, GNorm = 0.0919, lr_0 = 2.7665e-04
Loss = 6.0150e-03, PNorm = 168.3639, GNorm = 0.2879, lr_0 = 2.7646e-04
Loss = 3.7893e-03, PNorm = 168.3740, GNorm = 0.1545, lr_0 = 2.7627e-04
Loss = 4.5920e-03, PNorm = 168.3813, GNorm = 0.1668, lr_0 = 2.7608e-04
Loss = 3.8298e-03, PNorm = 168.3918, GNorm = 0.1392, lr_0 = 2.7590e-04
Loss = 5.8209e-03, PNorm = 168.4012, GNorm = 0.1498, lr_0 = 2.7571e-04
Loss = 9.0027e-03, PNorm = 168.4136, GNorm = 0.1345, lr_0 = 2.7552e-04
Loss = 5.9370e-03, PNorm = 168.4241, GNorm = 0.4600, lr_0 = 2.7533e-04
Loss = 4.1291e-03, PNorm = 168.4342, GNorm = 0.3274, lr_0 = 2.7514e-04
Loss = 5.6734e-03, PNorm = 168.4445, GNorm = 0.1817, lr_0 = 2.7495e-04
Loss = 3.7065e-03, PNorm = 168.4519, GNorm = 0.1876, lr_0 = 2.7476e-04
Loss = 4.3253e-03, PNorm = 168.4617, GNorm = 0.2907, lr_0 = 2.7457e-04
Loss = 4.2807e-03, PNorm = 168.4683, GNorm = 0.3962, lr_0 = 2.7439e-04
Loss = 4.7967e-03, PNorm = 168.4774, GNorm = 0.4500, lr_0 = 2.7420e-04
Loss = 4.6784e-03, PNorm = 168.4881, GNorm = 0.2184, lr_0 = 2.7401e-04
Loss = 3.9124e-03, PNorm = 168.4999, GNorm = 0.1972, lr_0 = 2.7382e-04
Loss = 4.1545e-03, PNorm = 168.5106, GNorm = 0.0585, lr_0 = 2.7364e-04
Loss = 3.8577e-03, PNorm = 168.5226, GNorm = 0.1450, lr_0 = 2.7345e-04
Loss = 4.3852e-03, PNorm = 168.5312, GNorm = 0.0510, lr_0 = 2.7326e-04
Loss = 4.2561e-03, PNorm = 168.5396, GNorm = 0.2726, lr_0 = 2.7307e-04
Loss = 4.0324e-03, PNorm = 168.5471, GNorm = 0.2131, lr_0 = 2.7289e-04
Loss = 4.4034e-03, PNorm = 168.5565, GNorm = 0.2560, lr_0 = 2.7270e-04
Loss = 4.3936e-03, PNorm = 168.5663, GNorm = 0.2199, lr_0 = 2.7251e-04
Loss = 4.5779e-03, PNorm = 168.5741, GNorm = 0.1817, lr_0 = 2.7233e-04
Loss = 3.7951e-03, PNorm = 168.5819, GNorm = 0.1394, lr_0 = 2.7214e-04
Loss = 5.7124e-03, PNorm = 168.5898, GNorm = 0.1425, lr_0 = 2.7195e-04
Loss = 3.7975e-03, PNorm = 168.5990, GNorm = 0.1130, lr_0 = 2.7177e-04
Loss = 4.6362e-03, PNorm = 168.6082, GNorm = 0.4008, lr_0 = 2.7158e-04
Loss = 3.9620e-03, PNorm = 168.6169, GNorm = 0.0804, lr_0 = 2.7139e-04
Loss = 3.7715e-03, PNorm = 168.6277, GNorm = 0.3615, lr_0 = 2.7121e-04
Loss = 3.4980e-03, PNorm = 168.6379, GNorm = 0.1483, lr_0 = 2.7102e-04
Loss = 3.4931e-03, PNorm = 168.6483, GNorm = 0.1546, lr_0 = 2.7084e-04
Loss = 6.5223e-03, PNorm = 168.6564, GNorm = 0.1618, lr_0 = 2.7065e-04
Loss = 3.0729e-03, PNorm = 168.6637, GNorm = 0.1291, lr_0 = 2.7047e-04
Loss = 6.4382e-03, PNorm = 168.6703, GNorm = 0.1631, lr_0 = 2.7028e-04
Loss = 3.4292e-03, PNorm = 168.6794, GNorm = 0.2447, lr_0 = 2.7010e-04
Loss = 4.3495e-03, PNorm = 168.6861, GNorm = 0.2044, lr_0 = 2.6991e-04
Loss = 5.8060e-03, PNorm = 168.6961, GNorm = 0.2901, lr_0 = 2.6973e-04
Loss = 4.4090e-03, PNorm = 168.7032, GNorm = 0.1404, lr_0 = 2.6954e-04
Loss = 4.6695e-03, PNorm = 168.7110, GNorm = 0.1153, lr_0 = 2.6936e-04
Loss = 4.9655e-03, PNorm = 168.7212, GNorm = 0.2598, lr_0 = 2.6917e-04
Loss = 5.5564e-03, PNorm = 168.7324, GNorm = 0.0859, lr_0 = 2.6899e-04
Loss = 5.3385e-03, PNorm = 168.7442, GNorm = 0.2210, lr_0 = 2.6880e-04
Loss = 4.2755e-03, PNorm = 168.7535, GNorm = 0.1351, lr_0 = 2.6862e-04
Loss = 4.5633e-03, PNorm = 168.7630, GNorm = 0.1840, lr_0 = 2.6844e-04
Loss = 5.2293e-03, PNorm = 168.7698, GNorm = 0.1679, lr_0 = 2.6825e-04
Validation mae = 0.279415
Epoch 18
Loss = 3.6108e-03, PNorm = 168.7820, GNorm = 0.2551, lr_0 = 2.6807e-04
Loss = 4.4652e-03, PNorm = 168.7900, GNorm = 0.1875, lr_0 = 2.6788e-04
Loss = 3.7270e-03, PNorm = 168.7978, GNorm = 0.0790, lr_0 = 2.6770e-04
Loss = 3.6231e-03, PNorm = 168.8037, GNorm = 0.1418, lr_0 = 2.6752e-04
Loss = 3.4758e-03, PNorm = 168.8096, GNorm = 0.1033, lr_0 = 2.6733e-04
Loss = 4.1649e-03, PNorm = 168.8153, GNorm = 0.1330, lr_0 = 2.6715e-04
Loss = 4.9981e-03, PNorm = 168.8207, GNorm = 0.1089, lr_0 = 2.6697e-04
Loss = 3.4271e-03, PNorm = 168.8268, GNorm = 0.2029, lr_0 = 2.6678e-04
Loss = 3.2579e-03, PNorm = 168.8325, GNorm = 0.1403, lr_0 = 2.6660e-04
Loss = 3.4353e-03, PNorm = 168.8391, GNorm = 0.0670, lr_0 = 2.6642e-04
Loss = 3.7573e-03, PNorm = 168.8498, GNorm = 0.2757, lr_0 = 2.6624e-04
Loss = 4.2666e-03, PNorm = 168.8583, GNorm = 0.1889, lr_0 = 2.6605e-04
Loss = 4.1417e-03, PNorm = 168.8655, GNorm = 0.1335, lr_0 = 2.6587e-04
Loss = 5.1748e-03, PNorm = 168.8708, GNorm = 0.2511, lr_0 = 2.6569e-04
Loss = 3.3545e-03, PNorm = 168.8781, GNorm = 0.1545, lr_0 = 2.6551e-04
Loss = 3.8374e-03, PNorm = 168.8852, GNorm = 0.3358, lr_0 = 2.6533e-04
Loss = 4.3939e-03, PNorm = 168.8901, GNorm = 0.0493, lr_0 = 2.6514e-04
Loss = 4.0341e-03, PNorm = 168.8971, GNorm = 0.1764, lr_0 = 2.6496e-04
Loss = 4.6580e-03, PNorm = 168.9049, GNorm = 0.3003, lr_0 = 2.6478e-04
Loss = 3.0618e-03, PNorm = 168.9125, GNorm = 0.1229, lr_0 = 2.6460e-04
Loss = 4.5641e-03, PNorm = 168.9193, GNorm = 0.1152, lr_0 = 2.6442e-04
Loss = 3.3449e-03, PNorm = 168.9262, GNorm = 0.1538, lr_0 = 2.6424e-04
Loss = 3.9165e-03, PNorm = 168.9332, GNorm = 0.2221, lr_0 = 2.6406e-04
Loss = 4.7050e-03, PNorm = 168.9376, GNorm = 0.2667, lr_0 = 2.6388e-04
Loss = 4.3729e-03, PNorm = 168.9434, GNorm = 0.0609, lr_0 = 2.6369e-04
Loss = 3.0389e-03, PNorm = 168.9492, GNorm = 0.1453, lr_0 = 2.6351e-04
Loss = 3.3411e-03, PNorm = 168.9569, GNorm = 0.1391, lr_0 = 2.6333e-04
Loss = 3.7391e-03, PNorm = 168.9632, GNorm = 0.1129, lr_0 = 2.6315e-04
Loss = 3.9600e-03, PNorm = 168.9696, GNorm = 0.2055, lr_0 = 2.6297e-04
Loss = 4.1622e-03, PNorm = 168.9749, GNorm = 0.2345, lr_0 = 2.6279e-04
Loss = 3.7175e-03, PNorm = 168.9826, GNorm = 0.1459, lr_0 = 2.6261e-04
Loss = 3.9687e-03, PNorm = 168.9907, GNorm = 0.1549, lr_0 = 2.6243e-04
Loss = 2.9518e-03, PNorm = 168.9981, GNorm = 0.1375, lr_0 = 2.6225e-04
Loss = 3.5807e-03, PNorm = 169.0059, GNorm = 0.1206, lr_0 = 2.6207e-04
Loss = 3.5602e-03, PNorm = 169.0111, GNorm = 0.1118, lr_0 = 2.6189e-04
Loss = 3.1743e-03, PNorm = 169.0164, GNorm = 0.2434, lr_0 = 2.6171e-04
Loss = 4.6617e-03, PNorm = 169.0242, GNorm = 0.4780, lr_0 = 2.6153e-04
Loss = 4.1820e-03, PNorm = 169.0302, GNorm = 0.3832, lr_0 = 2.6136e-04
Loss = 3.7808e-03, PNorm = 169.0389, GNorm = 0.0687, lr_0 = 2.6118e-04
Loss = 3.8218e-03, PNorm = 169.0461, GNorm = 0.1410, lr_0 = 2.6100e-04
Loss = 5.1768e-03, PNorm = 169.0547, GNorm = 0.1213, lr_0 = 2.6082e-04
Loss = 4.8470e-03, PNorm = 169.0623, GNorm = 0.1749, lr_0 = 2.6064e-04
Loss = 5.5473e-03, PNorm = 169.0686, GNorm = 0.0909, lr_0 = 2.6046e-04
Loss = 3.0273e-03, PNorm = 169.0754, GNorm = 0.1017, lr_0 = 2.6028e-04
Loss = 7.0266e-03, PNorm = 169.0841, GNorm = 0.2422, lr_0 = 2.6011e-04
Loss = 4.4114e-03, PNorm = 169.0926, GNorm = 0.1736, lr_0 = 2.5993e-04
Loss = 3.4526e-03, PNorm = 169.1014, GNorm = 0.1833, lr_0 = 2.5975e-04
Loss = 4.1812e-03, PNorm = 169.1097, GNorm = 0.3120, lr_0 = 2.5957e-04
Loss = 3.1271e-03, PNorm = 169.1174, GNorm = 0.2247, lr_0 = 2.5939e-04
Loss = 5.1186e-03, PNorm = 169.1267, GNorm = 0.3594, lr_0 = 2.5922e-04
Loss = 4.0950e-03, PNorm = 169.1358, GNorm = 0.2649, lr_0 = 2.5904e-04
Loss = 5.2303e-03, PNorm = 169.1446, GNorm = 0.1928, lr_0 = 2.5886e-04
Loss = 3.9099e-03, PNorm = 169.1527, GNorm = 0.0855, lr_0 = 2.5868e-04
Loss = 6.5939e-03, PNorm = 169.1601, GNorm = 0.2974, lr_0 = 2.5851e-04
Loss = 3.4110e-03, PNorm = 169.1680, GNorm = 0.1956, lr_0 = 2.5833e-04
Loss = 2.8293e-03, PNorm = 169.1795, GNorm = 0.0602, lr_0 = 2.5815e-04
Loss = 4.5259e-03, PNorm = 169.1870, GNorm = 0.1454, lr_0 = 2.5797e-04
Loss = 3.8195e-03, PNorm = 169.1939, GNorm = 0.1653, lr_0 = 2.5780e-04
Loss = 3.4180e-03, PNorm = 169.2013, GNorm = 0.1190, lr_0 = 2.5762e-04
Loss = 3.7825e-03, PNorm = 169.2088, GNorm = 0.1899, lr_0 = 2.5745e-04
Loss = 3.6644e-03, PNorm = 169.2147, GNorm = 0.1983, lr_0 = 2.5727e-04
Loss = 2.9621e-03, PNorm = 169.2228, GNorm = 0.2079, lr_0 = 2.5709e-04
Loss = 6.7503e-03, PNorm = 169.2300, GNorm = 0.3211, lr_0 = 2.5692e-04
Loss = 3.1525e-03, PNorm = 169.2367, GNorm = 0.2712, lr_0 = 2.5674e-04
Loss = 4.3905e-03, PNorm = 169.2449, GNorm = 0.2096, lr_0 = 2.5656e-04
Loss = 3.4015e-03, PNorm = 169.2521, GNorm = 0.5733, lr_0 = 2.5639e-04
Loss = 3.3295e-03, PNorm = 169.2594, GNorm = 0.1166, lr_0 = 2.5621e-04
Loss = 4.9471e-03, PNorm = 169.2697, GNorm = 0.0710, lr_0 = 2.5604e-04
Loss = 5.1401e-03, PNorm = 169.2786, GNorm = 0.2599, lr_0 = 2.5586e-04
Loss = 4.3609e-03, PNorm = 169.2866, GNorm = 0.1655, lr_0 = 2.5569e-04
Loss = 3.2701e-03, PNorm = 169.2931, GNorm = 0.1406, lr_0 = 2.5551e-04
Loss = 4.5308e-03, PNorm = 169.3014, GNorm = 0.1541, lr_0 = 2.5534e-04
Loss = 3.9302e-03, PNorm = 169.3081, GNorm = 0.2723, lr_0 = 2.5516e-04
Loss = 4.2590e-03, PNorm = 169.3136, GNorm = 0.1804, lr_0 = 2.5499e-04
Loss = 3.2085e-03, PNorm = 169.3189, GNorm = 0.1914, lr_0 = 2.5481e-04
Loss = 3.5108e-03, PNorm = 169.3241, GNorm = 0.1521, lr_0 = 2.5464e-04
Loss = 4.9059e-03, PNorm = 169.3321, GNorm = 0.0787, lr_0 = 2.5446e-04
Loss = 3.4450e-03, PNorm = 169.3400, GNorm = 0.2084, lr_0 = 2.5429e-04
Loss = 3.1541e-03, PNorm = 169.3488, GNorm = 0.0862, lr_0 = 2.5411e-04
Loss = 2.9237e-03, PNorm = 169.3580, GNorm = 0.1158, lr_0 = 2.5394e-04
Loss = 3.9688e-03, PNorm = 169.3652, GNorm = 0.2600, lr_0 = 2.5377e-04
Loss = 2.9414e-03, PNorm = 169.3718, GNorm = 0.0717, lr_0 = 2.5359e-04
Loss = 3.1989e-03, PNorm = 169.3771, GNorm = 0.1888, lr_0 = 2.5342e-04
Loss = 3.6314e-03, PNorm = 169.3844, GNorm = 0.2248, lr_0 = 2.5325e-04
Loss = 3.4188e-03, PNorm = 169.3912, GNorm = 0.2452, lr_0 = 2.5307e-04
Loss = 3.2269e-03, PNorm = 169.4013, GNorm = 0.1692, lr_0 = 2.5290e-04
Loss = 3.3563e-03, PNorm = 169.4115, GNorm = 0.1652, lr_0 = 2.5273e-04
Loss = 2.8636e-03, PNorm = 169.4188, GNorm = 0.0961, lr_0 = 2.5255e-04
Loss = 3.0918e-03, PNorm = 169.4232, GNorm = 0.1007, lr_0 = 2.5238e-04
Loss = 3.2970e-03, PNorm = 169.4306, GNorm = 0.2302, lr_0 = 2.5221e-04
Loss = 3.0524e-03, PNorm = 169.4350, GNorm = 0.1387, lr_0 = 2.5203e-04
Loss = 3.7613e-03, PNorm = 169.4403, GNorm = 0.1854, lr_0 = 2.5186e-04
Loss = 5.2078e-03, PNorm = 169.4485, GNorm = 0.1967, lr_0 = 2.5169e-04
Loss = 5.4571e-03, PNorm = 169.4572, GNorm = 0.1472, lr_0 = 2.5152e-04
Loss = 3.9942e-03, PNorm = 169.4654, GNorm = 0.0563, lr_0 = 2.5134e-04
Loss = 6.5608e-03, PNorm = 169.4735, GNorm = 0.3505, lr_0 = 2.5117e-04
Loss = 2.8782e-03, PNorm = 169.4804, GNorm = 0.1743, lr_0 = 2.5100e-04
Loss = 3.6480e-03, PNorm = 169.4884, GNorm = 0.0777, lr_0 = 2.5083e-04
Loss = 3.9773e-03, PNorm = 169.4970, GNorm = 0.2111, lr_0 = 2.5066e-04
Loss = 4.2847e-03, PNorm = 169.5090, GNorm = 0.1979, lr_0 = 2.5048e-04
Loss = 3.1743e-03, PNorm = 169.5193, GNorm = 0.0867, lr_0 = 2.5031e-04
Loss = 6.2526e-03, PNorm = 169.5266, GNorm = 0.1624, lr_0 = 2.5014e-04
Loss = 4.7703e-03, PNorm = 169.5327, GNorm = 0.1807, lr_0 = 2.4997e-04
Loss = 4.5355e-03, PNorm = 169.5413, GNorm = 0.2380, lr_0 = 2.4980e-04
Loss = 4.4996e-03, PNorm = 169.5517, GNorm = 0.1431, lr_0 = 2.4963e-04
Loss = 5.1620e-03, PNorm = 169.5595, GNorm = 0.2133, lr_0 = 2.4946e-04
Loss = 3.6867e-03, PNorm = 169.5692, GNorm = 0.0859, lr_0 = 2.4929e-04
Loss = 3.7800e-03, PNorm = 169.5797, GNorm = 0.1103, lr_0 = 2.4911e-04
Loss = 4.0842e-03, PNorm = 169.5860, GNorm = 0.1003, lr_0 = 2.4894e-04
Loss = 3.4760e-03, PNorm = 169.5917, GNorm = 0.1803, lr_0 = 2.4877e-04
Loss = 4.1817e-03, PNorm = 169.5983, GNorm = 0.2715, lr_0 = 2.4860e-04
Loss = 5.0630e-03, PNorm = 169.6043, GNorm = 0.2014, lr_0 = 2.4843e-04
Loss = 4.7220e-03, PNorm = 169.6137, GNorm = 0.2427, lr_0 = 2.4826e-04
Loss = 4.7666e-03, PNorm = 169.6224, GNorm = 0.7173, lr_0 = 2.4809e-04
Loss = 3.6031e-03, PNorm = 169.6296, GNorm = 0.1525, lr_0 = 2.4792e-04
Loss = 4.2293e-03, PNorm = 169.6347, GNorm = 0.1110, lr_0 = 2.4775e-04
Loss = 3.5180e-03, PNorm = 169.6376, GNorm = 0.2373, lr_0 = 2.4758e-04
Loss = 5.4617e-03, PNorm = 169.6461, GNorm = 0.0927, lr_0 = 2.4741e-04
Loss = 4.3898e-03, PNorm = 169.6564, GNorm = 0.1065, lr_0 = 2.4724e-04
Loss = 4.9796e-03, PNorm = 169.6649, GNorm = 0.1964, lr_0 = 2.4707e-04
Validation mae = 0.278495
Epoch 19
Loss = 3.2710e-03, PNorm = 169.6709, GNorm = 0.2840, lr_0 = 2.4690e-04
Loss = 3.1030e-03, PNorm = 169.6746, GNorm = 0.2273, lr_0 = 2.4674e-04
Loss = 3.7681e-03, PNorm = 169.6769, GNorm = 0.0688, lr_0 = 2.4657e-04
Loss = 3.8325e-03, PNorm = 169.6796, GNorm = 0.1734, lr_0 = 2.4640e-04
Loss = 3.7754e-03, PNorm = 169.6844, GNorm = 0.0958, lr_0 = 2.4623e-04
Loss = 3.7235e-03, PNorm = 169.6940, GNorm = 0.1612, lr_0 = 2.4606e-04
Loss = 5.2426e-03, PNorm = 169.7015, GNorm = 0.1144, lr_0 = 2.4589e-04
Loss = 2.7476e-03, PNorm = 169.7076, GNorm = 0.1486, lr_0 = 2.4572e-04
Loss = 4.0298e-03, PNorm = 169.7174, GNorm = 0.1782, lr_0 = 2.4556e-04
Loss = 5.2022e-03, PNorm = 169.7246, GNorm = 0.2254, lr_0 = 2.4539e-04
Loss = 3.5974e-03, PNorm = 169.7308, GNorm = 0.5065, lr_0 = 2.4522e-04
Loss = 4.7108e-03, PNorm = 169.7400, GNorm = 0.1215, lr_0 = 2.4505e-04
Loss = 2.4807e-03, PNorm = 169.7458, GNorm = 0.1194, lr_0 = 2.4488e-04
Loss = 2.8800e-03, PNorm = 169.7520, GNorm = 0.1013, lr_0 = 2.4472e-04
Loss = 2.8100e-03, PNorm = 169.7568, GNorm = 0.1293, lr_0 = 2.4455e-04
Loss = 3.1127e-03, PNorm = 169.7637, GNorm = 0.1834, lr_0 = 2.4438e-04
Loss = 5.5117e-03, PNorm = 169.7702, GNorm = 0.3868, lr_0 = 2.4421e-04
Loss = 2.9717e-03, PNorm = 169.7774, GNorm = 0.3444, lr_0 = 2.4405e-04
Loss = 3.9453e-03, PNorm = 169.7816, GNorm = 0.2667, lr_0 = 2.4388e-04
Loss = 2.9751e-03, PNorm = 169.7867, GNorm = 0.1333, lr_0 = 2.4371e-04
Loss = 2.2439e-03, PNorm = 169.7902, GNorm = 0.1350, lr_0 = 2.4354e-04
Loss = 3.9952e-03, PNorm = 169.7937, GNorm = 0.4971, lr_0 = 2.4338e-04
Loss = 3.9357e-03, PNorm = 169.8015, GNorm = 0.1483, lr_0 = 2.4321e-04
Loss = 3.7802e-03, PNorm = 169.8096, GNorm = 0.0879, lr_0 = 2.4304e-04
Loss = 3.0089e-03, PNorm = 169.8177, GNorm = 0.0727, lr_0 = 2.4288e-04
Loss = 3.9884e-03, PNorm = 169.8244, GNorm = 0.0596, lr_0 = 2.4271e-04
Loss = 3.5440e-03, PNorm = 169.8290, GNorm = 0.2701, lr_0 = 2.4254e-04
Loss = 3.2885e-03, PNorm = 169.8343, GNorm = 0.1657, lr_0 = 2.4238e-04
Loss = 5.4762e-03, PNorm = 169.8408, GNorm = 0.1306, lr_0 = 2.4221e-04
Loss = 3.1589e-03, PNorm = 169.8456, GNorm = 0.1270, lr_0 = 2.4205e-04
Loss = 2.7776e-03, PNorm = 169.8507, GNorm = 0.1146, lr_0 = 2.4188e-04
Loss = 4.3317e-03, PNorm = 169.8539, GNorm = 0.0994, lr_0 = 2.4171e-04
Loss = 3.2742e-03, PNorm = 169.8586, GNorm = 0.1173, lr_0 = 2.4155e-04
Loss = 2.6642e-03, PNorm = 169.8636, GNorm = 0.2038, lr_0 = 2.4138e-04
Loss = 2.6150e-03, PNorm = 169.8685, GNorm = 0.1916, lr_0 = 2.4122e-04
Loss = 4.1799e-03, PNorm = 169.8748, GNorm = 0.1928, lr_0 = 2.4105e-04
Loss = 3.3797e-03, PNorm = 169.8796, GNorm = 0.2515, lr_0 = 2.4089e-04
Loss = 2.6914e-03, PNorm = 169.8846, GNorm = 0.2386, lr_0 = 2.4072e-04
Loss = 2.7280e-03, PNorm = 169.8889, GNorm = 0.0558, lr_0 = 2.4056e-04
Loss = 4.2014e-03, PNorm = 169.8936, GNorm = 0.0680, lr_0 = 2.4039e-04
Loss = 2.8892e-03, PNorm = 169.8987, GNorm = 0.1985, lr_0 = 2.4023e-04
Loss = 3.7585e-03, PNorm = 169.9061, GNorm = 0.1081, lr_0 = 2.4006e-04
Loss = 3.3980e-03, PNorm = 169.9130, GNorm = 0.1163, lr_0 = 2.3990e-04
Loss = 3.0036e-03, PNorm = 169.9201, GNorm = 0.2125, lr_0 = 2.3974e-04
Loss = 5.1928e-03, PNorm = 169.9247, GNorm = 0.2218, lr_0 = 2.3957e-04
Loss = 2.9583e-03, PNorm = 169.9329, GNorm = 0.1248, lr_0 = 2.3941e-04
Loss = 3.0904e-03, PNorm = 169.9379, GNorm = 0.2103, lr_0 = 2.3924e-04
Loss = 4.1764e-03, PNorm = 169.9435, GNorm = 0.0776, lr_0 = 2.3908e-04
Loss = 4.8618e-03, PNorm = 169.9491, GNorm = 0.0510, lr_0 = 2.3892e-04
Loss = 2.5798e-03, PNorm = 169.9559, GNorm = 0.3380, lr_0 = 2.3875e-04
Loss = 2.7612e-03, PNorm = 169.9639, GNorm = 0.1213, lr_0 = 2.3859e-04
Loss = 3.1291e-03, PNorm = 169.9705, GNorm = 0.2688, lr_0 = 2.3842e-04
Loss = 3.0001e-03, PNorm = 169.9782, GNorm = 0.2312, lr_0 = 2.3826e-04
Loss = 3.2057e-03, PNorm = 169.9841, GNorm = 0.1034, lr_0 = 2.3810e-04
Loss = 3.0892e-03, PNorm = 169.9901, GNorm = 0.1860, lr_0 = 2.3794e-04
Loss = 2.8000e-03, PNorm = 169.9956, GNorm = 0.1523, lr_0 = 2.3777e-04
Loss = 2.4476e-03, PNorm = 170.0014, GNorm = 0.1560, lr_0 = 2.3761e-04
Loss = 3.0022e-03, PNorm = 170.0085, GNorm = 0.1698, lr_0 = 2.3745e-04
Loss = 2.7445e-03, PNorm = 170.0141, GNorm = 0.2109, lr_0 = 2.3728e-04
Loss = 4.4292e-03, PNorm = 170.0189, GNorm = 0.2829, lr_0 = 2.3712e-04
Loss = 2.7633e-03, PNorm = 170.0255, GNorm = 0.4656, lr_0 = 2.3696e-04
Loss = 3.8539e-03, PNorm = 170.0330, GNorm = 0.1114, lr_0 = 2.3680e-04
Loss = 5.5737e-03, PNorm = 170.0418, GNorm = 0.2186, lr_0 = 2.3663e-04
Loss = 3.0708e-03, PNorm = 170.0498, GNorm = 0.5080, lr_0 = 2.3647e-04
Loss = 3.7963e-03, PNorm = 170.0558, GNorm = 0.1045, lr_0 = 2.3631e-04
Loss = 3.0655e-03, PNorm = 170.0613, GNorm = 0.4036, lr_0 = 2.3615e-04
Loss = 3.0390e-03, PNorm = 170.0649, GNorm = 0.1569, lr_0 = 2.3599e-04
Loss = 3.3251e-03, PNorm = 170.0719, GNorm = 0.1758, lr_0 = 2.3582e-04
Loss = 5.1360e-03, PNorm = 170.0786, GNorm = 0.2909, lr_0 = 2.3566e-04
Loss = 3.4078e-03, PNorm = 170.0869, GNorm = 0.0930, lr_0 = 2.3550e-04
Loss = 3.1525e-03, PNorm = 170.0943, GNorm = 0.2973, lr_0 = 2.3534e-04
Loss = 4.5036e-03, PNorm = 170.0997, GNorm = 0.1126, lr_0 = 2.3518e-04
Loss = 3.9834e-03, PNorm = 170.1025, GNorm = 0.4063, lr_0 = 2.3502e-04
Loss = 2.9538e-03, PNorm = 170.1070, GNorm = 0.3176, lr_0 = 2.3486e-04
Loss = 2.5727e-03, PNorm = 170.1143, GNorm = 0.4169, lr_0 = 2.3470e-04
Loss = 2.8455e-03, PNorm = 170.1197, GNorm = 0.1317, lr_0 = 2.3454e-04
Loss = 2.7151e-03, PNorm = 170.1258, GNorm = 0.0799, lr_0 = 2.3437e-04
Loss = 4.3225e-03, PNorm = 170.1320, GNorm = 0.2063, lr_0 = 2.3421e-04
Loss = 3.1744e-03, PNorm = 170.1396, GNorm = 0.3891, lr_0 = 2.3405e-04
Loss = 2.4290e-03, PNorm = 170.1474, GNorm = 0.1704, lr_0 = 2.3389e-04
Loss = 3.3096e-03, PNorm = 170.1552, GNorm = 0.1267, lr_0 = 2.3373e-04
Loss = 3.7744e-03, PNorm = 170.1611, GNorm = 0.1186, lr_0 = 2.3357e-04
Loss = 2.8795e-03, PNorm = 170.1667, GNorm = 0.3570, lr_0 = 2.3341e-04
Loss = 2.6107e-03, PNorm = 170.1746, GNorm = 0.2616, lr_0 = 2.3325e-04
Loss = 2.7560e-03, PNorm = 170.1816, GNorm = 0.1577, lr_0 = 2.3309e-04
Loss = 3.8642e-03, PNorm = 170.1881, GNorm = 0.1410, lr_0 = 2.3293e-04
Loss = 3.6958e-03, PNorm = 170.1963, GNorm = 0.3285, lr_0 = 2.3277e-04
Loss = 4.2701e-03, PNorm = 170.2045, GNorm = 0.1084, lr_0 = 2.3261e-04
Loss = 3.1876e-03, PNorm = 170.2085, GNorm = 0.3184, lr_0 = 2.3246e-04
Loss = 3.4876e-03, PNorm = 170.2133, GNorm = 0.1704, lr_0 = 2.3230e-04
Loss = 4.0224e-03, PNorm = 170.2165, GNorm = 0.2241, lr_0 = 2.3214e-04
Loss = 4.9196e-03, PNorm = 170.2233, GNorm = 0.2655, lr_0 = 2.3198e-04
Loss = 7.5218e-03, PNorm = 170.2313, GNorm = 0.0450, lr_0 = 2.3182e-04
Loss = 2.6082e-03, PNorm = 170.2385, GNorm = 0.1193, lr_0 = 2.3166e-04
Loss = 2.9790e-03, PNorm = 170.2472, GNorm = 0.5416, lr_0 = 2.3150e-04
Loss = 2.6302e-03, PNorm = 170.2526, GNorm = 0.0992, lr_0 = 2.3134e-04
Loss = 3.5217e-03, PNorm = 170.2568, GNorm = 0.0841, lr_0 = 2.3118e-04
Loss = 2.9241e-03, PNorm = 170.2612, GNorm = 0.1372, lr_0 = 2.3103e-04
Loss = 3.4080e-03, PNorm = 170.2702, GNorm = 0.0517, lr_0 = 2.3087e-04
Loss = 5.2515e-03, PNorm = 170.2750, GNorm = 0.0860, lr_0 = 2.3071e-04
Loss = 2.5560e-03, PNorm = 170.2807, GNorm = 0.1766, lr_0 = 2.3055e-04
Loss = 5.2123e-03, PNorm = 170.2832, GNorm = 0.3337, lr_0 = 2.3039e-04
Loss = 2.8046e-03, PNorm = 170.2914, GNorm = 0.0698, lr_0 = 2.3024e-04
Loss = 2.8791e-03, PNorm = 170.2967, GNorm = 0.0770, lr_0 = 2.3008e-04
Loss = 3.1000e-03, PNorm = 170.3045, GNorm = 0.2267, lr_0 = 2.2992e-04
Loss = 3.5458e-03, PNorm = 170.3106, GNorm = 0.1948, lr_0 = 2.2976e-04
Loss = 3.2503e-03, PNorm = 170.3158, GNorm = 0.0679, lr_0 = 2.2961e-04
Loss = 3.1096e-03, PNorm = 170.3215, GNorm = 0.1858, lr_0 = 2.2945e-04
Loss = 2.6607e-03, PNorm = 170.3303, GNorm = 0.0739, lr_0 = 2.2929e-04
Loss = 2.8576e-03, PNorm = 170.3380, GNorm = 0.1431, lr_0 = 2.2913e-04
Loss = 2.5355e-03, PNorm = 170.3429, GNorm = 0.1357, lr_0 = 2.2898e-04
Loss = 4.7406e-03, PNorm = 170.3503, GNorm = 0.0860, lr_0 = 2.2882e-04
Loss = 4.2575e-03, PNorm = 170.3565, GNorm = 0.1890, lr_0 = 2.2866e-04
Loss = 3.6624e-03, PNorm = 170.3619, GNorm = 0.2831, lr_0 = 2.2851e-04
Loss = 3.0956e-03, PNorm = 170.3687, GNorm = 0.1790, lr_0 = 2.2835e-04
Loss = 3.9642e-03, PNorm = 170.3756, GNorm = 0.2576, lr_0 = 2.2819e-04
Loss = 4.4744e-03, PNorm = 170.3847, GNorm = 0.0806, lr_0 = 2.2804e-04
Loss = 5.0301e-03, PNorm = 170.3941, GNorm = 0.2346, lr_0 = 2.2788e-04
Loss = 2.4282e-03, PNorm = 170.4046, GNorm = 0.1011, lr_0 = 2.2773e-04
Loss = 4.0668e-03, PNorm = 170.4118, GNorm = 0.0962, lr_0 = 2.2757e-04
Validation mae = 0.278791
Epoch 20
Loss = 2.2529e-03, PNorm = 170.4157, GNorm = 0.0926, lr_0 = 2.2741e-04
Loss = 2.7274e-03, PNorm = 170.4183, GNorm = 0.2579, lr_0 = 2.2726e-04
Loss = 3.3833e-03, PNorm = 170.4201, GNorm = 0.1597, lr_0 = 2.2710e-04
Loss = 2.5936e-03, PNorm = 170.4232, GNorm = 0.1515, lr_0 = 2.2695e-04
Loss = 3.4135e-03, PNorm = 170.4268, GNorm = 0.1533, lr_0 = 2.2679e-04
Loss = 2.7209e-03, PNorm = 170.4329, GNorm = 0.1484, lr_0 = 2.2664e-04
Loss = 3.6489e-03, PNorm = 170.4400, GNorm = 0.1299, lr_0 = 2.2648e-04
Loss = 2.0796e-03, PNorm = 170.4465, GNorm = 0.1015, lr_0 = 2.2632e-04
Loss = 3.3540e-03, PNorm = 170.4517, GNorm = 0.1154, lr_0 = 2.2617e-04
Loss = 2.5753e-03, PNorm = 170.4559, GNorm = 0.0964, lr_0 = 2.2601e-04
Loss = 2.8054e-03, PNorm = 170.4593, GNorm = 0.0741, lr_0 = 2.2586e-04
Loss = 2.8250e-03, PNorm = 170.4635, GNorm = 0.0880, lr_0 = 2.2571e-04
Loss = 2.7813e-03, PNorm = 170.4695, GNorm = 0.0725, lr_0 = 2.2555e-04
Loss = 2.4777e-03, PNorm = 170.4755, GNorm = 0.0721, lr_0 = 2.2540e-04
Loss = 3.1008e-03, PNorm = 170.4787, GNorm = 0.1244, lr_0 = 2.2524e-04
Loss = 2.3358e-03, PNorm = 170.4827, GNorm = 0.1791, lr_0 = 2.2509e-04
Loss = 2.2401e-03, PNorm = 170.4868, GNorm = 0.2003, lr_0 = 2.2493e-04
Loss = 2.0964e-03, PNorm = 170.4925, GNorm = 0.1446, lr_0 = 2.2478e-04
Loss = 2.4912e-03, PNorm = 170.4974, GNorm = 0.4016, lr_0 = 2.2463e-04
Loss = 2.2028e-03, PNorm = 170.5044, GNorm = 0.1559, lr_0 = 2.2447e-04
Loss = 3.3440e-03, PNorm = 170.5100, GNorm = 0.0815, lr_0 = 2.2432e-04
Loss = 3.7932e-03, PNorm = 170.5171, GNorm = 0.1037, lr_0 = 2.2416e-04
Loss = 4.5305e-03, PNorm = 170.5221, GNorm = 0.1813, lr_0 = 2.2401e-04
Loss = 2.2084e-03, PNorm = 170.5258, GNorm = 0.1630, lr_0 = 2.2386e-04
Loss = 2.5680e-03, PNorm = 170.5316, GNorm = 0.4567, lr_0 = 2.2370e-04
Loss = 3.3145e-03, PNorm = 170.5346, GNorm = 0.1027, lr_0 = 2.2355e-04
Loss = 2.3734e-03, PNorm = 170.5386, GNorm = 0.1099, lr_0 = 2.2340e-04
Loss = 2.3496e-03, PNorm = 170.5415, GNorm = 0.1002, lr_0 = 2.2324e-04
Loss = 2.5662e-03, PNorm = 170.5455, GNorm = 0.0703, lr_0 = 2.2309e-04
Loss = 2.2872e-03, PNorm = 170.5509, GNorm = 0.0902, lr_0 = 2.2294e-04
Loss = 2.8470e-03, PNorm = 170.5557, GNorm = 0.1556, lr_0 = 2.2279e-04
Loss = 2.4731e-03, PNorm = 170.5588, GNorm = 0.0708, lr_0 = 2.2263e-04
Loss = 3.0169e-03, PNorm = 170.5632, GNorm = 0.0941, lr_0 = 2.2248e-04
Loss = 2.8336e-03, PNorm = 170.5681, GNorm = 0.2377, lr_0 = 2.2233e-04
Loss = 3.1859e-03, PNorm = 170.5718, GNorm = 0.0599, lr_0 = 2.2218e-04
Loss = 2.7090e-03, PNorm = 170.5744, GNorm = 0.2913, lr_0 = 2.2202e-04
Loss = 3.5260e-03, PNorm = 170.5772, GNorm = 0.1978, lr_0 = 2.2187e-04
Loss = 3.3005e-03, PNorm = 170.5816, GNorm = 0.2538, lr_0 = 2.2172e-04
Loss = 2.8895e-03, PNorm = 170.5876, GNorm = 0.1480, lr_0 = 2.2157e-04
Loss = 2.6820e-03, PNorm = 170.5915, GNorm = 0.1957, lr_0 = 2.2142e-04
Loss = 2.7429e-03, PNorm = 170.5962, GNorm = 0.1344, lr_0 = 2.2126e-04
Loss = 4.1506e-03, PNorm = 170.6012, GNorm = 0.1126, lr_0 = 2.2111e-04
Loss = 2.3362e-03, PNorm = 170.6074, GNorm = 0.0652, lr_0 = 2.2096e-04
Loss = 2.9232e-03, PNorm = 170.6109, GNorm = 0.1040, lr_0 = 2.2081e-04
Loss = 3.0269e-03, PNorm = 170.6157, GNorm = 0.2608, lr_0 = 2.2066e-04
Loss = 3.3243e-03, PNorm = 170.6217, GNorm = 0.1295, lr_0 = 2.2051e-04
Loss = 3.3406e-03, PNorm = 170.6281, GNorm = 0.1854, lr_0 = 2.2036e-04
Loss = 3.2659e-03, PNorm = 170.6310, GNorm = 0.1244, lr_0 = 2.2021e-04
Loss = 2.4919e-03, PNorm = 170.6361, GNorm = 0.1924, lr_0 = 2.2005e-04
Loss = 2.5457e-03, PNorm = 170.6401, GNorm = 0.1690, lr_0 = 2.1990e-04
Loss = 2.2980e-03, PNorm = 170.6443, GNorm = 0.1614, lr_0 = 2.1975e-04
Loss = 4.8989e-03, PNorm = 170.6477, GNorm = 0.4859, lr_0 = 2.1960e-04
Loss = 2.8614e-03, PNorm = 170.6555, GNorm = 0.2837, lr_0 = 2.1945e-04
Loss = 4.9581e-03, PNorm = 170.6614, GNorm = 0.1268, lr_0 = 2.1930e-04
Loss = 3.4825e-03, PNorm = 170.6672, GNorm = 0.1084, lr_0 = 2.1915e-04
Loss = 2.3018e-03, PNorm = 170.6729, GNorm = 0.2095, lr_0 = 2.1900e-04
Loss = 2.6123e-03, PNorm = 170.6804, GNorm = 0.1815, lr_0 = 2.1885e-04
Loss = 5.4856e-03, PNorm = 170.6869, GNorm = 0.1492, lr_0 = 2.1870e-04
Loss = 2.9734e-03, PNorm = 170.6938, GNorm = 0.2429, lr_0 = 2.1855e-04
Loss = 2.2366e-03, PNorm = 170.7009, GNorm = 0.1327, lr_0 = 2.1840e-04
Loss = 3.3807e-03, PNorm = 170.7071, GNorm = 0.2686, lr_0 = 2.1825e-04
Loss = 2.7783e-03, PNorm = 170.7148, GNorm = 0.0594, lr_0 = 2.1810e-04
Loss = 4.8367e-03, PNorm = 170.7199, GNorm = 0.1065, lr_0 = 2.1795e-04
Loss = 3.9581e-03, PNorm = 170.7231, GNorm = 0.0812, lr_0 = 2.1780e-04
Loss = 2.1301e-03, PNorm = 170.7281, GNorm = 0.0593, lr_0 = 2.1765e-04
Loss = 2.3147e-03, PNorm = 170.7325, GNorm = 0.1991, lr_0 = 2.1751e-04
Loss = 2.2863e-03, PNorm = 170.7389, GNorm = 0.1535, lr_0 = 2.1736e-04
Loss = 3.0206e-03, PNorm = 170.7444, GNorm = 0.0534, lr_0 = 2.1721e-04
Loss = 3.9034e-03, PNorm = 170.7499, GNorm = 0.0909, lr_0 = 2.1706e-04
Loss = 2.4973e-03, PNorm = 170.7546, GNorm = 0.2319, lr_0 = 2.1691e-04
Loss = 2.5622e-03, PNorm = 170.7605, GNorm = 0.2395, lr_0 = 2.1676e-04
Loss = 2.1648e-03, PNorm = 170.7655, GNorm = 0.1302, lr_0 = 2.1661e-04
Loss = 2.0583e-03, PNorm = 170.7699, GNorm = 0.1237, lr_0 = 2.1646e-04
Loss = 2.8786e-03, PNorm = 170.7741, GNorm = 0.2000, lr_0 = 2.1632e-04
Loss = 2.5766e-03, PNorm = 170.7790, GNorm = 0.1342, lr_0 = 2.1617e-04
Loss = 2.7487e-03, PNorm = 170.7853, GNorm = 0.3585, lr_0 = 2.1602e-04
Loss = 2.6990e-03, PNorm = 170.7908, GNorm = 0.0647, lr_0 = 2.1587e-04
Loss = 3.6995e-03, PNorm = 170.7943, GNorm = 0.1336, lr_0 = 2.1572e-04
Loss = 4.0428e-03, PNorm = 170.7975, GNorm = 0.0608, lr_0 = 2.1558e-04
Loss = 3.8928e-03, PNorm = 170.8031, GNorm = 0.0647, lr_0 = 2.1543e-04
Loss = 2.6153e-03, PNorm = 170.8093, GNorm = 0.1185, lr_0 = 2.1528e-04
Loss = 2.5590e-03, PNorm = 170.8154, GNorm = 0.0655, lr_0 = 2.1513e-04
Loss = 2.3782e-03, PNorm = 170.8196, GNorm = 0.0868, lr_0 = 2.1499e-04
Loss = 2.7733e-03, PNorm = 170.8250, GNorm = 0.0568, lr_0 = 2.1484e-04
Loss = 5.1270e-03, PNorm = 170.8315, GNorm = 0.2075, lr_0 = 2.1469e-04
Loss = 2.9803e-03, PNorm = 170.8377, GNorm = 0.1433, lr_0 = 2.1454e-04
Loss = 2.3428e-03, PNorm = 170.8439, GNorm = 0.2694, lr_0 = 2.1440e-04
Loss = 2.4914e-03, PNorm = 170.8507, GNorm = 0.2090, lr_0 = 2.1425e-04
Loss = 2.6008e-03, PNorm = 170.8565, GNorm = 0.0997, lr_0 = 2.1410e-04
Loss = 2.7261e-03, PNorm = 170.8616, GNorm = 0.1411, lr_0 = 2.1396e-04
Loss = 2.2610e-03, PNorm = 170.8675, GNorm = 0.1147, lr_0 = 2.1381e-04
Loss = 6.9543e-03, PNorm = 170.8686, GNorm = 0.2849, lr_0 = 2.1366e-04
Loss = 2.9718e-03, PNorm = 170.8720, GNorm = 0.1111, lr_0 = 2.1352e-04
Loss = 3.2875e-03, PNorm = 170.8768, GNorm = 0.2366, lr_0 = 2.1337e-04
Loss = 3.1296e-03, PNorm = 170.8841, GNorm = 0.1366, lr_0 = 2.1323e-04
Loss = 4.7134e-03, PNorm = 170.8892, GNorm = 0.2173, lr_0 = 2.1308e-04
Loss = 2.9188e-03, PNorm = 170.8957, GNorm = 0.0855, lr_0 = 2.1293e-04
Loss = 2.8109e-03, PNorm = 170.9020, GNorm = 0.1429, lr_0 = 2.1279e-04
Loss = 3.9486e-03, PNorm = 170.9078, GNorm = 0.2420, lr_0 = 2.1264e-04
Loss = 4.6346e-03, PNorm = 170.9131, GNorm = 0.3831, lr_0 = 2.1250e-04
Loss = 3.6519e-03, PNorm = 170.9185, GNorm = 0.0821, lr_0 = 2.1235e-04
Loss = 3.8253e-03, PNorm = 170.9223, GNorm = 0.2332, lr_0 = 2.1221e-04
Loss = 3.8256e-03, PNorm = 170.9286, GNorm = 0.2108, lr_0 = 2.1206e-04
Loss = 2.5792e-03, PNorm = 170.9342, GNorm = 0.2740, lr_0 = 2.1191e-04
Loss = 2.2271e-03, PNorm = 170.9407, GNorm = 0.1595, lr_0 = 2.1177e-04
Loss = 4.9253e-03, PNorm = 170.9470, GNorm = 0.1475, lr_0 = 2.1162e-04
Loss = 5.2832e-03, PNorm = 170.9533, GNorm = 0.3333, lr_0 = 2.1148e-04
Loss = 2.4451e-03, PNorm = 170.9585, GNorm = 0.1042, lr_0 = 2.1133e-04
Loss = 3.3448e-03, PNorm = 170.9631, GNorm = 0.1415, lr_0 = 2.1119e-04
Loss = 3.0359e-03, PNorm = 170.9685, GNorm = 0.0632, lr_0 = 2.1104e-04
Loss = 2.4025e-03, PNorm = 170.9730, GNorm = 0.1823, lr_0 = 2.1090e-04
Loss = 2.7339e-03, PNorm = 170.9798, GNorm = 0.1759, lr_0 = 2.1076e-04
Loss = 2.8607e-03, PNorm = 170.9849, GNorm = 0.1779, lr_0 = 2.1061e-04
Loss = 2.6119e-03, PNorm = 170.9918, GNorm = 0.1290, lr_0 = 2.1047e-04
Loss = 2.0421e-03, PNorm = 170.9973, GNorm = 0.1012, lr_0 = 2.1032e-04
Loss = 3.2939e-03, PNorm = 171.0007, GNorm = 0.1084, lr_0 = 2.1018e-04
Loss = 4.1770e-03, PNorm = 171.0080, GNorm = 0.0625, lr_0 = 2.1003e-04
Loss = 2.5635e-03, PNorm = 171.0151, GNorm = 0.2993, lr_0 = 2.0989e-04
Loss = 2.7390e-03, PNorm = 171.0196, GNorm = 0.1881, lr_0 = 2.0975e-04
Loss = 2.3371e-03, PNorm = 171.0248, GNorm = 0.1431, lr_0 = 2.0960e-04
Validation mae = 0.278303
Epoch 21
Loss = 2.5780e-03, PNorm = 171.0302, GNorm = 0.0590, lr_0 = 2.0946e-04
Loss = 2.1723e-03, PNorm = 171.0335, GNorm = 0.0919, lr_0 = 2.0932e-04
Loss = 2.1270e-03, PNorm = 171.0385, GNorm = 0.2603, lr_0 = 2.0917e-04
Loss = 3.1367e-03, PNorm = 171.0428, GNorm = 0.0711, lr_0 = 2.0903e-04
Loss = 2.1139e-03, PNorm = 171.0442, GNorm = 0.1661, lr_0 = 2.0889e-04
Loss = 1.9671e-03, PNorm = 171.0469, GNorm = 0.1557, lr_0 = 2.0874e-04
Loss = 3.4618e-03, PNorm = 171.0502, GNorm = 0.1513, lr_0 = 2.0860e-04
Loss = 1.7519e-03, PNorm = 171.0535, GNorm = 0.0931, lr_0 = 2.0846e-04
Loss = 2.2739e-03, PNorm = 171.0564, GNorm = 0.1008, lr_0 = 2.0831e-04
Loss = 2.5237e-03, PNorm = 171.0606, GNorm = 0.1638, lr_0 = 2.0817e-04
Loss = 2.8099e-03, PNorm = 171.0648, GNorm = 0.3008, lr_0 = 2.0803e-04
Loss = 2.1167e-03, PNorm = 171.0697, GNorm = 0.1010, lr_0 = 2.0789e-04
Loss = 2.0408e-03, PNorm = 171.0748, GNorm = 0.1473, lr_0 = 2.0774e-04
Loss = 2.2968e-03, PNorm = 171.0772, GNorm = 0.1262, lr_0 = 2.0760e-04
Loss = 2.3677e-03, PNorm = 171.0795, GNorm = 0.0593, lr_0 = 2.0746e-04
Loss = 1.8079e-03, PNorm = 171.0821, GNorm = 0.1308, lr_0 = 2.0732e-04
Loss = 3.3048e-03, PNorm = 171.0860, GNorm = 0.1958, lr_0 = 2.0718e-04
Loss = 2.1762e-03, PNorm = 171.0904, GNorm = 0.1501, lr_0 = 2.0703e-04
Loss = 2.8051e-03, PNorm = 171.0948, GNorm = 0.1511, lr_0 = 2.0689e-04
Loss = 3.4182e-03, PNorm = 171.0993, GNorm = 0.1505, lr_0 = 2.0675e-04
Loss = 2.8510e-03, PNorm = 171.1034, GNorm = 0.1121, lr_0 = 2.0661e-04
Loss = 1.9189e-03, PNorm = 171.1072, GNorm = 0.2151, lr_0 = 2.0647e-04
Loss = 2.6384e-03, PNorm = 171.1085, GNorm = 0.0710, lr_0 = 2.0633e-04
Loss = 2.1632e-03, PNorm = 171.1098, GNorm = 0.1055, lr_0 = 2.0618e-04
Loss = 2.1154e-03, PNorm = 171.1132, GNorm = 0.1571, lr_0 = 2.0604e-04
Loss = 2.0801e-03, PNorm = 171.1172, GNorm = 0.1848, lr_0 = 2.0590e-04
Loss = 2.2122e-03, PNorm = 171.1226, GNorm = 0.0874, lr_0 = 2.0576e-04
Loss = 3.8488e-03, PNorm = 171.1269, GNorm = 0.1921, lr_0 = 2.0562e-04
Loss = 2.0981e-03, PNorm = 171.1295, GNorm = 0.0884, lr_0 = 2.0548e-04
Loss = 3.4943e-03, PNorm = 171.1338, GNorm = 0.1578, lr_0 = 2.0534e-04
Loss = 2.8777e-03, PNorm = 171.1428, GNorm = 0.1750, lr_0 = 2.0520e-04
Loss = 1.9167e-03, PNorm = 171.1500, GNorm = 0.0461, lr_0 = 2.0506e-04
Loss = 2.7708e-03, PNorm = 171.1529, GNorm = 0.1666, lr_0 = 2.0492e-04
Loss = 2.4686e-03, PNorm = 171.1575, GNorm = 0.0703, lr_0 = 2.0478e-04
Loss = 2.2231e-03, PNorm = 171.1628, GNorm = 0.1304, lr_0 = 2.0464e-04
Loss = 4.5529e-03, PNorm = 171.1661, GNorm = 0.1268, lr_0 = 2.0450e-04
Loss = 2.3879e-03, PNorm = 171.1716, GNorm = 0.1758, lr_0 = 2.0436e-04
Loss = 1.8685e-03, PNorm = 171.1755, GNorm = 0.1078, lr_0 = 2.0422e-04
Loss = 2.0843e-03, PNorm = 171.1796, GNorm = 0.2248, lr_0 = 2.0408e-04
Loss = 2.3916e-03, PNorm = 171.1854, GNorm = 0.0885, lr_0 = 2.0394e-04
Loss = 2.5360e-03, PNorm = 171.1900, GNorm = 0.2144, lr_0 = 2.0380e-04
Loss = 2.0358e-03, PNorm = 171.1939, GNorm = 0.1608, lr_0 = 2.0366e-04
Loss = 3.9982e-03, PNorm = 171.1989, GNorm = 0.1282, lr_0 = 2.0352e-04
Loss = 2.2188e-03, PNorm = 171.2005, GNorm = 0.1050, lr_0 = 2.0338e-04
Loss = 2.8555e-03, PNorm = 171.2062, GNorm = 0.0574, lr_0 = 2.0324e-04
Loss = 2.4705e-03, PNorm = 171.2125, GNorm = 0.0874, lr_0 = 2.0310e-04
Loss = 2.2163e-03, PNorm = 171.2168, GNorm = 0.0851, lr_0 = 2.0296e-04
Loss = 2.6405e-03, PNorm = 171.2212, GNorm = 0.1363, lr_0 = 2.0282e-04
Loss = 2.1851e-03, PNorm = 171.2255, GNorm = 0.1044, lr_0 = 2.0268e-04
Loss = 2.2978e-03, PNorm = 171.2291, GNorm = 0.1009, lr_0 = 2.0254e-04
Loss = 2.0141e-03, PNorm = 171.2319, GNorm = 0.0695, lr_0 = 2.0240e-04
Loss = 2.0464e-03, PNorm = 171.2367, GNorm = 0.0650, lr_0 = 2.0227e-04
Loss = 1.7364e-03, PNorm = 171.2406, GNorm = 0.1121, lr_0 = 2.0213e-04
Loss = 2.4556e-03, PNorm = 171.2451, GNorm = 0.2305, lr_0 = 2.0199e-04
Loss = 2.5974e-03, PNorm = 171.2489, GNorm = 0.0861, lr_0 = 2.0185e-04
Loss = 4.6400e-03, PNorm = 171.2534, GNorm = 0.1269, lr_0 = 2.0171e-04
Loss = 2.2639e-03, PNorm = 171.2590, GNorm = 0.0526, lr_0 = 2.0157e-04
Loss = 4.9377e-03, PNorm = 171.2650, GNorm = 0.0517, lr_0 = 2.0144e-04
Loss = 3.0854e-03, PNorm = 171.2690, GNorm = 0.1056, lr_0 = 2.0130e-04
Loss = 2.7097e-03, PNorm = 171.2727, GNorm = 0.1555, lr_0 = 2.0116e-04
Loss = 1.8164e-03, PNorm = 171.2785, GNorm = 0.1299, lr_0 = 2.0102e-04
Loss = 1.9334e-03, PNorm = 171.2834, GNorm = 0.1905, lr_0 = 2.0088e-04
Loss = 1.9539e-03, PNorm = 171.2880, GNorm = 0.1645, lr_0 = 2.0075e-04
Loss = 3.5699e-03, PNorm = 171.2913, GNorm = 0.1261, lr_0 = 2.0061e-04
Loss = 4.0202e-03, PNorm = 171.2975, GNorm = 0.2041, lr_0 = 2.0047e-04
Loss = 1.8664e-03, PNorm = 171.2996, GNorm = 0.2118, lr_0 = 2.0033e-04
Loss = 2.1225e-03, PNorm = 171.3034, GNorm = 0.1059, lr_0 = 2.0020e-04
Loss = 3.0277e-03, PNorm = 171.3081, GNorm = 0.1195, lr_0 = 2.0006e-04
Loss = 2.6438e-03, PNorm = 171.3147, GNorm = 0.1903, lr_0 = 1.9992e-04
Loss = 2.0167e-03, PNorm = 171.3189, GNorm = 0.1880, lr_0 = 1.9979e-04
Loss = 3.0159e-03, PNorm = 171.3225, GNorm = 0.0952, lr_0 = 1.9965e-04
Loss = 2.8394e-03, PNorm = 171.3263, GNorm = 0.2168, lr_0 = 1.9951e-04
Loss = 2.1496e-03, PNorm = 171.3306, GNorm = 0.0634, lr_0 = 1.9938e-04
Loss = 4.1001e-03, PNorm = 171.3354, GNorm = 0.6916, lr_0 = 1.9924e-04
Loss = 2.2169e-03, PNorm = 171.3389, GNorm = 0.2565, lr_0 = 1.9910e-04
Loss = 2.7773e-03, PNorm = 171.3428, GNorm = 0.0953, lr_0 = 1.9897e-04
Loss = 2.2836e-03, PNorm = 171.3474, GNorm = 0.0843, lr_0 = 1.9883e-04
Loss = 4.3663e-03, PNorm = 171.3534, GNorm = 0.1229, lr_0 = 1.9869e-04
Loss = 5.2730e-03, PNorm = 171.3589, GNorm = 0.1762, lr_0 = 1.9856e-04
Loss = 2.0698e-03, PNorm = 171.3645, GNorm = 0.1630, lr_0 = 1.9842e-04
Loss = 4.8359e-03, PNorm = 171.3678, GNorm = 0.0998, lr_0 = 1.9829e-04
Loss = 3.6906e-03, PNorm = 171.3745, GNorm = 0.2231, lr_0 = 1.9815e-04
Loss = 2.4590e-03, PNorm = 171.3789, GNorm = 0.3724, lr_0 = 1.9801e-04
Loss = 2.4720e-03, PNorm = 171.3833, GNorm = 0.1891, lr_0 = 1.9788e-04
Loss = 1.9813e-03, PNorm = 171.3889, GNorm = 0.0850, lr_0 = 1.9774e-04
Loss = 2.1868e-03, PNorm = 171.3925, GNorm = 0.0664, lr_0 = 1.9761e-04
Loss = 1.9867e-03, PNorm = 171.3951, GNorm = 0.1864, lr_0 = 1.9747e-04
Loss = 2.2619e-03, PNorm = 171.4001, GNorm = 0.1103, lr_0 = 1.9734e-04
Loss = 1.9053e-03, PNorm = 171.4040, GNorm = 0.0932, lr_0 = 1.9720e-04
Loss = 4.0134e-03, PNorm = 171.4110, GNorm = 0.2814, lr_0 = 1.9707e-04
Loss = 1.7919e-03, PNorm = 171.4163, GNorm = 0.0940, lr_0 = 1.9693e-04
Loss = 1.9614e-03, PNorm = 171.4223, GNorm = 0.0949, lr_0 = 1.9680e-04
Loss = 3.6790e-03, PNorm = 171.4269, GNorm = 0.0987, lr_0 = 1.9666e-04
Loss = 2.2923e-03, PNorm = 171.4322, GNorm = 0.2181, lr_0 = 1.9653e-04
Loss = 2.1508e-03, PNorm = 171.4354, GNorm = 0.1272, lr_0 = 1.9639e-04
Loss = 1.7785e-03, PNorm = 171.4382, GNorm = 0.1786, lr_0 = 1.9626e-04
Loss = 1.7968e-03, PNorm = 171.4424, GNorm = 0.0775, lr_0 = 1.9612e-04
Loss = 2.9340e-03, PNorm = 171.4474, GNorm = 0.1210, lr_0 = 1.9599e-04
Loss = 1.8477e-03, PNorm = 171.4518, GNorm = 0.0654, lr_0 = 1.9585e-04
Loss = 2.4835e-03, PNorm = 171.4586, GNorm = 0.1162, lr_0 = 1.9572e-04
Loss = 1.7387e-03, PNorm = 171.4626, GNorm = 0.1340, lr_0 = 1.9559e-04
Loss = 2.1102e-03, PNorm = 171.4657, GNorm = 0.0574, lr_0 = 1.9545e-04
Loss = 4.5743e-03, PNorm = 171.4707, GNorm = 0.0827, lr_0 = 1.9532e-04
Loss = 3.6106e-03, PNorm = 171.4755, GNorm = 0.0752, lr_0 = 1.9518e-04
Loss = 1.7093e-03, PNorm = 171.4811, GNorm = 0.1872, lr_0 = 1.9505e-04
Loss = 2.7067e-03, PNorm = 171.4864, GNorm = 0.2104, lr_0 = 1.9492e-04
Loss = 2.2443e-03, PNorm = 171.4905, GNorm = 0.1522, lr_0 = 1.9478e-04
Loss = 4.0263e-03, PNorm = 171.4944, GNorm = 0.1833, lr_0 = 1.9465e-04
Loss = 4.6562e-03, PNorm = 171.4994, GNorm = 0.1497, lr_0 = 1.9452e-04
Loss = 3.8344e-03, PNorm = 171.5023, GNorm = 0.0950, lr_0 = 1.9438e-04
Loss = 2.1738e-03, PNorm = 171.5045, GNorm = 0.1500, lr_0 = 1.9425e-04
Loss = 3.3017e-03, PNorm = 171.5070, GNorm = 0.1144, lr_0 = 1.9412e-04
Loss = 3.7884e-03, PNorm = 171.5104, GNorm = 0.1158, lr_0 = 1.9398e-04
Loss = 2.5208e-03, PNorm = 171.5148, GNorm = 0.1482, lr_0 = 1.9385e-04
Loss = 1.7332e-03, PNorm = 171.5204, GNorm = 0.1299, lr_0 = 1.9372e-04
Loss = 2.4037e-03, PNorm = 171.5264, GNorm = 0.1307, lr_0 = 1.9359e-04
Loss = 2.6307e-03, PNorm = 171.5327, GNorm = 0.1557, lr_0 = 1.9345e-04
Loss = 3.5708e-03, PNorm = 171.5405, GNorm = 0.2049, lr_0 = 1.9332e-04
Loss = 3.4188e-03, PNorm = 171.5471, GNorm = 0.0889, lr_0 = 1.9319e-04
Loss = 2.2526e-03, PNorm = 171.5520, GNorm = 0.1684, lr_0 = 1.9306e-04
Validation mae = 0.278469
Epoch 22
Loss = 2.1281e-03, PNorm = 171.5559, GNorm = 0.0434, lr_0 = 1.9292e-04
Loss = 2.2142e-03, PNorm = 171.5594, GNorm = 0.1767, lr_0 = 1.9279e-04
Loss = 2.3980e-03, PNorm = 171.5629, GNorm = 0.1146, lr_0 = 1.9266e-04
Loss = 1.6213e-03, PNorm = 171.5659, GNorm = 0.0954, lr_0 = 1.9253e-04
Loss = 1.8916e-03, PNorm = 171.5690, GNorm = 0.1888, lr_0 = 1.9240e-04
Loss = 3.2553e-03, PNorm = 171.5698, GNorm = 0.0690, lr_0 = 1.9226e-04
Loss = 1.8178e-03, PNorm = 171.5728, GNorm = 0.0678, lr_0 = 1.9213e-04
Loss = 3.0580e-03, PNorm = 171.5775, GNorm = 0.3259, lr_0 = 1.9200e-04
Loss = 2.7544e-03, PNorm = 171.5815, GNorm = 0.0699, lr_0 = 1.9187e-04
Loss = 1.7593e-03, PNorm = 171.5846, GNorm = 0.3207, lr_0 = 1.9174e-04
Loss = 2.3809e-03, PNorm = 171.5889, GNorm = 0.1870, lr_0 = 1.9161e-04
Loss = 3.2056e-03, PNorm = 171.5915, GNorm = 0.4294, lr_0 = 1.9148e-04
Loss = 1.5626e-03, PNorm = 171.5962, GNorm = 0.0436, lr_0 = 1.9134e-04
Loss = 1.7602e-03, PNorm = 171.5991, GNorm = 0.0664, lr_0 = 1.9121e-04
Loss = 2.8222e-03, PNorm = 171.6040, GNorm = 0.1299, lr_0 = 1.9108e-04
Loss = 1.4798e-03, PNorm = 171.6068, GNorm = 0.0958, lr_0 = 1.9095e-04
Loss = 2.7217e-03, PNorm = 171.6093, GNorm = 0.0507, lr_0 = 1.9082e-04
Loss = 2.7227e-03, PNorm = 171.6132, GNorm = 0.0987, lr_0 = 1.9069e-04
Loss = 2.3507e-03, PNorm = 171.6155, GNorm = 0.1550, lr_0 = 1.9056e-04
Loss = 2.9689e-03, PNorm = 171.6196, GNorm = 0.2033, lr_0 = 1.9043e-04
Loss = 1.6674e-03, PNorm = 171.6223, GNorm = 0.1487, lr_0 = 1.9030e-04
Loss = 1.5346e-03, PNorm = 171.6240, GNorm = 0.1519, lr_0 = 1.9017e-04
Loss = 3.6207e-03, PNorm = 171.6271, GNorm = 0.1001, lr_0 = 1.9004e-04
Loss = 2.7059e-03, PNorm = 171.6305, GNorm = 0.1190, lr_0 = 1.8991e-04
Loss = 2.4021e-03, PNorm = 171.6343, GNorm = 0.0580, lr_0 = 1.8978e-04
Loss = 2.1352e-03, PNorm = 171.6376, GNorm = 0.1516, lr_0 = 1.8965e-04
Loss = 1.9059e-03, PNorm = 171.6410, GNorm = 0.1071, lr_0 = 1.8952e-04
Loss = 1.9959e-03, PNorm = 171.6448, GNorm = 0.2070, lr_0 = 1.8939e-04
Loss = 1.8397e-03, PNorm = 171.6480, GNorm = 0.1859, lr_0 = 1.8926e-04
Loss = 2.2644e-03, PNorm = 171.6528, GNorm = 0.2180, lr_0 = 1.8913e-04
Loss = 2.5292e-03, PNorm = 171.6565, GNorm = 0.0928, lr_0 = 1.8900e-04
Loss = 1.9841e-03, PNorm = 171.6605, GNorm = 0.2052, lr_0 = 1.8887e-04
Loss = 2.1838e-03, PNorm = 171.6656, GNorm = 0.1782, lr_0 = 1.8874e-04
Loss = 3.9949e-03, PNorm = 171.6725, GNorm = 0.2633, lr_0 = 1.8861e-04
Loss = 2.1912e-03, PNorm = 171.6774, GNorm = 0.2416, lr_0 = 1.8848e-04
Loss = 2.0317e-03, PNorm = 171.6797, GNorm = 0.1375, lr_0 = 1.8835e-04
Loss = 1.8843e-03, PNorm = 171.6820, GNorm = 0.1526, lr_0 = 1.8822e-04
Loss = 2.0056e-03, PNorm = 171.6861, GNorm = 0.0528, lr_0 = 1.8809e-04
Loss = 7.3981e-03, PNorm = 171.6921, GNorm = 0.2026, lr_0 = 1.8797e-04
Loss = 2.2058e-03, PNorm = 171.6974, GNorm = 0.0578, lr_0 = 1.8784e-04
Loss = 3.3448e-03, PNorm = 171.7016, GNorm = 0.2397, lr_0 = 1.8771e-04
Loss = 2.7625e-03, PNorm = 171.7068, GNorm = 0.1733, lr_0 = 1.8758e-04
Loss = 1.4379e-03, PNorm = 171.7110, GNorm = 0.1196, lr_0 = 1.8745e-04
Loss = 2.9932e-03, PNorm = 171.7130, GNorm = 0.0899, lr_0 = 1.8732e-04
Loss = 2.2282e-03, PNorm = 171.7162, GNorm = 0.1088, lr_0 = 1.8719e-04
Loss = 1.9566e-03, PNorm = 171.7189, GNorm = 0.1060, lr_0 = 1.8707e-04
Loss = 3.6694e-03, PNorm = 171.7213, GNorm = 0.1162, lr_0 = 1.8694e-04
Loss = 3.1673e-03, PNorm = 171.7243, GNorm = 0.1786, lr_0 = 1.8681e-04
Loss = 1.6969e-03, PNorm = 171.7261, GNorm = 0.0944, lr_0 = 1.8668e-04
Loss = 1.6267e-03, PNorm = 171.7286, GNorm = 0.2950, lr_0 = 1.8655e-04
Loss = 1.5881e-03, PNorm = 171.7338, GNorm = 0.1278, lr_0 = 1.8643e-04
Loss = 2.1802e-03, PNorm = 171.7370, GNorm = 0.2209, lr_0 = 1.8630e-04
Loss = 2.8730e-03, PNorm = 171.7385, GNorm = 0.0870, lr_0 = 1.8617e-04
Loss = 2.2488e-03, PNorm = 171.7424, GNorm = 0.0921, lr_0 = 1.8604e-04
Loss = 3.4965e-03, PNorm = 171.7428, GNorm = 0.2106, lr_0 = 1.8592e-04
Loss = 1.9262e-03, PNorm = 171.7471, GNorm = 0.1525, lr_0 = 1.8579e-04
Loss = 2.2135e-03, PNorm = 171.7509, GNorm = 0.0458, lr_0 = 1.8566e-04
Loss = 2.7700e-03, PNorm = 171.7530, GNorm = 0.1720, lr_0 = 1.8553e-04
Loss = 2.0091e-03, PNorm = 171.7565, GNorm = 0.1938, lr_0 = 1.8541e-04
Loss = 1.8227e-03, PNorm = 171.7622, GNorm = 0.0945, lr_0 = 1.8528e-04
Loss = 1.8191e-03, PNorm = 171.7674, GNorm = 0.2872, lr_0 = 1.8515e-04
Loss = 3.9042e-03, PNorm = 171.7722, GNorm = 0.0986, lr_0 = 1.8503e-04
Loss = 4.2972e-03, PNorm = 171.7759, GNorm = 0.3413, lr_0 = 1.8490e-04
Loss = 1.6941e-03, PNorm = 171.7801, GNorm = 0.2531, lr_0 = 1.8477e-04
Loss = 2.7580e-03, PNorm = 171.7822, GNorm = 0.1536, lr_0 = 1.8465e-04
Loss = 3.2860e-03, PNorm = 171.7867, GNorm = 0.1624, lr_0 = 1.8452e-04
Loss = 3.5243e-03, PNorm = 171.7893, GNorm = 0.2360, lr_0 = 1.8439e-04
Loss = 2.1595e-03, PNorm = 171.7907, GNorm = 0.0763, lr_0 = 1.8427e-04
Loss = 1.7742e-03, PNorm = 171.7910, GNorm = 0.2346, lr_0 = 1.8414e-04
Loss = 1.4705e-03, PNorm = 171.7960, GNorm = 0.1767, lr_0 = 1.8401e-04
Loss = 2.7378e-03, PNorm = 171.8013, GNorm = 0.0786, lr_0 = 1.8389e-04
Loss = 2.4931e-03, PNorm = 171.8046, GNorm = 0.1429, lr_0 = 1.8376e-04
Loss = 1.5909e-03, PNorm = 171.8087, GNorm = 0.0566, lr_0 = 1.8364e-04
Loss = 1.5504e-03, PNorm = 171.8124, GNorm = 0.0925, lr_0 = 1.8351e-04
Loss = 1.4853e-03, PNorm = 171.8166, GNorm = 0.0793, lr_0 = 1.8338e-04
Loss = 3.9874e-03, PNorm = 171.8226, GNorm = 0.1232, lr_0 = 1.8326e-04
Loss = 3.1601e-03, PNorm = 171.8266, GNorm = 0.0396, lr_0 = 1.8313e-04
Loss = 1.7167e-03, PNorm = 171.8311, GNorm = 0.0829, lr_0 = 1.8301e-04
Loss = 1.6747e-03, PNorm = 171.8369, GNorm = 0.0674, lr_0 = 1.8288e-04
Loss = 1.5819e-03, PNorm = 171.8407, GNorm = 0.1045, lr_0 = 1.8276e-04
Loss = 2.2806e-03, PNorm = 171.8427, GNorm = 0.1427, lr_0 = 1.8263e-04
Loss = 1.3866e-03, PNorm = 171.8447, GNorm = 0.1600, lr_0 = 1.8251e-04
Loss = 1.5009e-03, PNorm = 171.8483, GNorm = 0.1435, lr_0 = 1.8238e-04
Loss = 3.8995e-03, PNorm = 171.8527, GNorm = 0.0579, lr_0 = 1.8226e-04
Loss = 1.6704e-03, PNorm = 171.8566, GNorm = 0.0518, lr_0 = 1.8213e-04
Loss = 2.2442e-03, PNorm = 171.8596, GNorm = 0.1310, lr_0 = 1.8201e-04
Loss = 3.4543e-03, PNorm = 171.8642, GNorm = 0.0768, lr_0 = 1.8188e-04
Loss = 2.9737e-03, PNorm = 171.8681, GNorm = 0.1253, lr_0 = 1.8176e-04
Loss = 3.7134e-03, PNorm = 171.8739, GNorm = 0.1210, lr_0 = 1.8163e-04
Loss = 1.4462e-03, PNorm = 171.8773, GNorm = 0.0408, lr_0 = 1.8151e-04
Loss = 2.2911e-03, PNorm = 171.8793, GNorm = 0.1155, lr_0 = 1.8138e-04
Loss = 2.3019e-03, PNorm = 171.8839, GNorm = 0.0503, lr_0 = 1.8126e-04
Loss = 3.0197e-03, PNorm = 171.8881, GNorm = 0.1481, lr_0 = 1.8114e-04
Loss = 2.1625e-03, PNorm = 171.8928, GNorm = 0.0818, lr_0 = 1.8101e-04
Loss = 1.8163e-03, PNorm = 171.8961, GNorm = 0.0605, lr_0 = 1.8089e-04
Loss = 3.6263e-03, PNorm = 171.9013, GNorm = 0.0777, lr_0 = 1.8076e-04
Loss = 1.9718e-03, PNorm = 171.9060, GNorm = 0.1981, lr_0 = 1.8064e-04
Loss = 1.9686e-03, PNorm = 171.9094, GNorm = 0.0990, lr_0 = 1.8052e-04
Loss = 3.9532e-03, PNorm = 171.9129, GNorm = 0.1028, lr_0 = 1.8039e-04
Loss = 2.0039e-03, PNorm = 171.9166, GNorm = 0.1535, lr_0 = 1.8027e-04
Loss = 1.7879e-03, PNorm = 171.9202, GNorm = 0.1095, lr_0 = 1.8015e-04
Loss = 1.4983e-03, PNorm = 171.9242, GNorm = 0.0663, lr_0 = 1.8002e-04
Loss = 3.3641e-03, PNorm = 171.9287, GNorm = 0.1399, lr_0 = 1.7990e-04
Loss = 3.2648e-03, PNorm = 171.9308, GNorm = 0.0585, lr_0 = 1.7978e-04
Loss = 2.6583e-03, PNorm = 171.9357, GNorm = 0.1442, lr_0 = 1.7965e-04
Loss = 1.5012e-03, PNorm = 171.9391, GNorm = 0.0853, lr_0 = 1.7953e-04
Loss = 1.8749e-03, PNorm = 171.9441, GNorm = 0.1431, lr_0 = 1.7941e-04
Loss = 3.3362e-03, PNorm = 171.9456, GNorm = 0.0785, lr_0 = 1.7928e-04
Loss = 3.0326e-03, PNorm = 171.9493, GNorm = 0.1843, lr_0 = 1.7916e-04
Loss = 2.1149e-03, PNorm = 171.9533, GNorm = 0.1633, lr_0 = 1.7904e-04
Loss = 2.1712e-03, PNorm = 171.9585, GNorm = 0.1667, lr_0 = 1.7892e-04
Loss = 1.4713e-03, PNorm = 171.9649, GNorm = 0.1316, lr_0 = 1.7879e-04
Loss = 2.1145e-03, PNorm = 171.9683, GNorm = 0.1077, lr_0 = 1.7867e-04
Loss = 1.6785e-03, PNorm = 171.9732, GNorm = 0.0838, lr_0 = 1.7855e-04
Loss = 2.7764e-03, PNorm = 171.9778, GNorm = 0.1230, lr_0 = 1.7843e-04
Loss = 2.1924e-03, PNorm = 171.9821, GNorm = 0.0952, lr_0 = 1.7830e-04
Loss = 2.5174e-03, PNorm = 171.9873, GNorm = 0.1216, lr_0 = 1.7818e-04
Loss = 1.6807e-03, PNorm = 171.9927, GNorm = 0.0934, lr_0 = 1.7806e-04
Loss = 1.6124e-03, PNorm = 171.9979, GNorm = 0.1535, lr_0 = 1.7794e-04
Loss = 2.5843e-03, PNorm = 172.0021, GNorm = 0.0676, lr_0 = 1.7782e-04
Validation mae = 0.277847
Epoch 23
Loss = 2.0136e-03, PNorm = 172.0041, GNorm = 0.2791, lr_0 = 1.7769e-04
Loss = 1.8862e-03, PNorm = 172.0049, GNorm = 0.0466, lr_0 = 1.7757e-04
Loss = 1.6926e-03, PNorm = 172.0070, GNorm = 0.2261, lr_0 = 1.7745e-04
Loss = 1.7231e-03, PNorm = 172.0101, GNorm = 0.0654, lr_0 = 1.7733e-04
Loss = 1.5942e-03, PNorm = 172.0132, GNorm = 0.0618, lr_0 = 1.7721e-04
Loss = 1.7842e-03, PNorm = 172.0155, GNorm = 0.1326, lr_0 = 1.7709e-04
Loss = 2.9736e-03, PNorm = 172.0182, GNorm = 0.2665, lr_0 = 1.7696e-04
Loss = 1.2730e-03, PNorm = 172.0205, GNorm = 0.0525, lr_0 = 1.7684e-04
Loss = 1.8818e-03, PNorm = 172.0244, GNorm = 0.0998, lr_0 = 1.7672e-04
Loss = 1.5017e-03, PNorm = 172.0275, GNorm = 0.0443, lr_0 = 1.7660e-04
Loss = 3.1008e-03, PNorm = 172.0303, GNorm = 0.1489, lr_0 = 1.7648e-04
Loss = 1.6389e-03, PNorm = 172.0331, GNorm = 0.1385, lr_0 = 1.7636e-04
Loss = 1.3473e-03, PNorm = 172.0378, GNorm = 0.0558, lr_0 = 1.7624e-04
Loss = 3.3386e-03, PNorm = 172.0416, GNorm = 0.0438, lr_0 = 1.7612e-04
Loss = 2.1949e-03, PNorm = 172.0448, GNorm = 0.1889, lr_0 = 1.7600e-04
Loss = 2.3641e-03, PNorm = 172.0482, GNorm = 0.1785, lr_0 = 1.7588e-04
Loss = 3.2155e-03, PNorm = 172.0503, GNorm = 0.1877, lr_0 = 1.7576e-04
Loss = 2.3746e-03, PNorm = 172.0542, GNorm = 0.1381, lr_0 = 1.7564e-04
Loss = 1.9939e-03, PNorm = 172.0582, GNorm = 0.2023, lr_0 = 1.7552e-04
Loss = 2.3384e-03, PNorm = 172.0620, GNorm = 0.1852, lr_0 = 1.7540e-04
Loss = 1.4022e-03, PNorm = 172.0637, GNorm = 0.0894, lr_0 = 1.7528e-04
Loss = 1.2292e-03, PNorm = 172.0663, GNorm = 0.2723, lr_0 = 1.7516e-04
Loss = 1.4620e-03, PNorm = 172.0693, GNorm = 0.0560, lr_0 = 1.7504e-04
Loss = 1.3773e-03, PNorm = 172.0720, GNorm = 0.1497, lr_0 = 1.7492e-04
Loss = 1.8267e-03, PNorm = 172.0743, GNorm = 0.1123, lr_0 = 1.7480e-04
Loss = 1.6779e-03, PNorm = 172.0736, GNorm = 0.0652, lr_0 = 1.7468e-04
Loss = 3.4998e-03, PNorm = 172.0767, GNorm = 0.1034, lr_0 = 1.7456e-04
Loss = 1.6704e-03, PNorm = 172.0794, GNorm = 0.0790, lr_0 = 1.7444e-04
Loss = 2.1072e-03, PNorm = 172.0851, GNorm = 0.0549, lr_0 = 1.7432e-04
Loss = 1.6513e-03, PNorm = 172.0870, GNorm = 0.1247, lr_0 = 1.7420e-04
Loss = 2.9317e-03, PNorm = 172.0895, GNorm = 0.1229, lr_0 = 1.7408e-04
Loss = 1.4536e-03, PNorm = 172.0922, GNorm = 0.0688, lr_0 = 1.7396e-04
Loss = 3.2977e-03, PNorm = 172.0942, GNorm = 0.0923, lr_0 = 1.7384e-04
Loss = 2.2307e-03, PNorm = 172.0976, GNorm = 0.1261, lr_0 = 1.7372e-04
Loss = 2.3117e-03, PNorm = 172.1007, GNorm = 0.1378, lr_0 = 1.7360e-04
Loss = 2.1501e-03, PNorm = 172.1046, GNorm = 0.0618, lr_0 = 1.7348e-04
Loss = 1.9957e-03, PNorm = 172.1075, GNorm = 0.0974, lr_0 = 1.7336e-04
Loss = 1.2443e-03, PNorm = 172.1098, GNorm = 0.0945, lr_0 = 1.7325e-04
Loss = 3.3115e-03, PNorm = 172.1137, GNorm = 0.1792, lr_0 = 1.7313e-04
Loss = 1.9299e-03, PNorm = 172.1164, GNorm = 0.0535, lr_0 = 1.7301e-04
Loss = 1.3151e-03, PNorm = 172.1192, GNorm = 0.0966, lr_0 = 1.7289e-04
Loss = 2.8534e-03, PNorm = 172.1206, GNorm = 0.1112, lr_0 = 1.7277e-04
Loss = 1.7587e-03, PNorm = 172.1235, GNorm = 0.1369, lr_0 = 1.7265e-04
Loss = 1.6980e-03, PNorm = 172.1270, GNorm = 0.1135, lr_0 = 1.7253e-04
Loss = 1.9548e-03, PNorm = 172.1277, GNorm = 0.2407, lr_0 = 1.7242e-04
Loss = 2.2793e-03, PNorm = 172.1294, GNorm = 0.0679, lr_0 = 1.7230e-04
Loss = 1.5005e-03, PNorm = 172.1306, GNorm = 0.0651, lr_0 = 1.7218e-04
Loss = 1.1541e-03, PNorm = 172.1334, GNorm = 0.0284, lr_0 = 1.7206e-04
Loss = 1.6215e-03, PNorm = 172.1372, GNorm = 0.0949, lr_0 = 1.7194e-04
Loss = 2.1494e-03, PNorm = 172.1427, GNorm = 0.1525, lr_0 = 1.7183e-04
Loss = 1.8233e-03, PNorm = 172.1457, GNorm = 0.0873, lr_0 = 1.7171e-04
Loss = 2.0358e-03, PNorm = 172.1462, GNorm = 0.0502, lr_0 = 1.7159e-04
Loss = 1.4735e-03, PNorm = 172.1488, GNorm = 0.1407, lr_0 = 1.7147e-04
Loss = 2.4056e-03, PNorm = 172.1522, GNorm = 0.0780, lr_0 = 1.7136e-04
Loss = 2.3661e-03, PNorm = 172.1552, GNorm = 0.4253, lr_0 = 1.7124e-04
Loss = 2.3376e-03, PNorm = 172.1603, GNorm = 0.2588, lr_0 = 1.7112e-04
Loss = 1.7732e-03, PNorm = 172.1647, GNorm = 0.2916, lr_0 = 1.7100e-04
Loss = 1.5346e-03, PNorm = 172.1685, GNorm = 0.1472, lr_0 = 1.7089e-04
Loss = 1.5524e-03, PNorm = 172.1722, GNorm = 0.1823, lr_0 = 1.7077e-04
Loss = 1.4780e-03, PNorm = 172.1762, GNorm = 0.0312, lr_0 = 1.7065e-04
Loss = 3.3253e-03, PNorm = 172.1783, GNorm = 0.2266, lr_0 = 1.7054e-04
Loss = 2.7219e-03, PNorm = 172.1810, GNorm = 0.0602, lr_0 = 1.7042e-04
Loss = 4.3580e-03, PNorm = 172.1826, GNorm = 0.1694, lr_0 = 1.7030e-04
Loss = 1.5670e-03, PNorm = 172.1870, GNorm = 0.0409, lr_0 = 1.7019e-04
Loss = 1.6109e-03, PNorm = 172.1889, GNorm = 0.0347, lr_0 = 1.7007e-04
Loss = 1.5260e-03, PNorm = 172.1907, GNorm = 0.2430, lr_0 = 1.6995e-04
Loss = 1.3101e-03, PNorm = 172.1950, GNorm = 0.1925, lr_0 = 1.6984e-04
Loss = 2.6246e-03, PNorm = 172.1991, GNorm = 0.1200, lr_0 = 1.6972e-04
Loss = 3.9667e-03, PNorm = 172.2031, GNorm = 0.1160, lr_0 = 1.6960e-04
Loss = 2.9544e-03, PNorm = 172.2061, GNorm = 0.2157, lr_0 = 1.6949e-04
Loss = 1.7464e-03, PNorm = 172.2088, GNorm = 0.1000, lr_0 = 1.6937e-04
Loss = 1.6293e-03, PNorm = 172.2116, GNorm = 0.2339, lr_0 = 1.6926e-04
Loss = 5.6633e-03, PNorm = 172.2143, GNorm = 0.1863, lr_0 = 1.6914e-04
Loss = 1.4929e-03, PNorm = 172.2178, GNorm = 0.0910, lr_0 = 1.6902e-04
Loss = 1.8283e-03, PNorm = 172.2227, GNorm = 0.1634, lr_0 = 1.6891e-04
Loss = 1.6426e-03, PNorm = 172.2272, GNorm = 0.0720, lr_0 = 1.6879e-04
Loss = 2.0054e-03, PNorm = 172.2306, GNorm = 0.2318, lr_0 = 1.6868e-04
Loss = 1.1738e-03, PNorm = 172.2337, GNorm = 0.1451, lr_0 = 1.6856e-04
Loss = 1.8196e-03, PNorm = 172.2377, GNorm = 0.1921, lr_0 = 1.6845e-04
Loss = 1.1984e-03, PNorm = 172.2415, GNorm = 0.1186, lr_0 = 1.6833e-04
Loss = 2.1689e-03, PNorm = 172.2436, GNorm = 0.0702, lr_0 = 1.6821e-04
Loss = 1.3404e-03, PNorm = 172.2454, GNorm = 0.1461, lr_0 = 1.6810e-04
Loss = 1.3737e-03, PNorm = 172.2481, GNorm = 0.1397, lr_0 = 1.6798e-04
Loss = 1.2357e-03, PNorm = 172.2496, GNorm = 0.1311, lr_0 = 1.6787e-04
Loss = 2.5577e-03, PNorm = 172.2530, GNorm = 0.1857, lr_0 = 1.6775e-04
Loss = 1.2249e-03, PNorm = 172.2567, GNorm = 0.0839, lr_0 = 1.6764e-04
Loss = 1.8289e-03, PNorm = 172.2615, GNorm = 0.1969, lr_0 = 1.6752e-04
Loss = 3.6021e-03, PNorm = 172.2640, GNorm = 0.1320, lr_0 = 1.6741e-04
Loss = 2.8091e-03, PNorm = 172.2683, GNorm = 0.1794, lr_0 = 1.6729e-04
Loss = 1.4190e-03, PNorm = 172.2710, GNorm = 0.1361, lr_0 = 1.6718e-04
Loss = 3.9077e-03, PNorm = 172.2751, GNorm = 0.0538, lr_0 = 1.6707e-04
Loss = 2.3433e-03, PNorm = 172.2782, GNorm = 0.1570, lr_0 = 1.6695e-04
Loss = 2.9087e-03, PNorm = 172.2809, GNorm = 0.1017, lr_0 = 1.6684e-04
Loss = 4.4210e-03, PNorm = 172.2840, GNorm = 0.2379, lr_0 = 1.6672e-04
Loss = 1.6866e-03, PNorm = 172.2879, GNorm = 0.3456, lr_0 = 1.6661e-04
Loss = 2.5888e-03, PNorm = 172.2921, GNorm = 0.3866, lr_0 = 1.6649e-04
Loss = 4.8482e-03, PNorm = 172.2962, GNorm = 0.1014, lr_0 = 1.6638e-04
Loss = 2.2029e-03, PNorm = 172.2977, GNorm = 0.2062, lr_0 = 1.6627e-04
Loss = 2.5854e-03, PNorm = 172.3014, GNorm = 0.1408, lr_0 = 1.6615e-04
Loss = 1.7930e-03, PNorm = 172.3047, GNorm = 0.0583, lr_0 = 1.6604e-04
Loss = 2.1239e-03, PNorm = 172.3087, GNorm = 0.0554, lr_0 = 1.6592e-04
Loss = 1.7944e-03, PNorm = 172.3109, GNorm = 0.0454, lr_0 = 1.6581e-04
Loss = 1.6587e-03, PNorm = 172.3148, GNorm = 0.0651, lr_0 = 1.6570e-04
Loss = 2.4877e-03, PNorm = 172.3177, GNorm = 0.1777, lr_0 = 1.6558e-04
Loss = 1.6072e-03, PNorm = 172.3210, GNorm = 0.2851, lr_0 = 1.6547e-04
Loss = 1.5968e-03, PNorm = 172.3229, GNorm = 0.1717, lr_0 = 1.6536e-04
Loss = 2.9807e-03, PNorm = 172.3265, GNorm = 0.1341, lr_0 = 1.6524e-04
Loss = 1.3711e-03, PNorm = 172.3313, GNorm = 0.1086, lr_0 = 1.6513e-04
Loss = 2.1813e-03, PNorm = 172.3368, GNorm = 0.1146, lr_0 = 1.6502e-04
Loss = 1.3186e-03, PNorm = 172.3422, GNorm = 0.1286, lr_0 = 1.6490e-04
Loss = 1.5652e-03, PNorm = 172.3460, GNorm = 0.0571, lr_0 = 1.6479e-04
Loss = 1.6383e-03, PNorm = 172.3481, GNorm = 0.1033, lr_0 = 1.6468e-04
Loss = 1.6518e-03, PNorm = 172.3520, GNorm = 0.2147, lr_0 = 1.6457e-04
Loss = 2.4499e-03, PNorm = 172.3538, GNorm = 0.1501, lr_0 = 1.6445e-04
Loss = 4.2671e-03, PNorm = 172.3548, GNorm = 0.1930, lr_0 = 1.6434e-04
Loss = 1.0067e-03, PNorm = 172.3563, GNorm = 0.0578, lr_0 = 1.6423e-04
Loss = 3.5910e-03, PNorm = 172.3581, GNorm = 0.3046, lr_0 = 1.6412e-04
Loss = 1.2938e-03, PNorm = 172.3614, GNorm = 0.0722, lr_0 = 1.6400e-04
Loss = 2.0257e-03, PNorm = 172.3657, GNorm = 0.0896, lr_0 = 1.6389e-04
Loss = 1.6206e-03, PNorm = 172.3699, GNorm = 0.1246, lr_0 = 1.6378e-04
Validation mae = 0.278036
Epoch 24
Loss = 1.2188e-03, PNorm = 172.3724, GNorm = 0.0704, lr_0 = 1.6367e-04
Loss = 1.7869e-03, PNorm = 172.3740, GNorm = 0.0826, lr_0 = 1.6355e-04
Loss = 2.6508e-03, PNorm = 172.3758, GNorm = 0.0806, lr_0 = 1.6344e-04
Loss = 1.3130e-03, PNorm = 172.3790, GNorm = 0.0870, lr_0 = 1.6333e-04
Loss = 1.9438e-03, PNorm = 172.3809, GNorm = 0.2917, lr_0 = 1.6322e-04
Loss = 1.5721e-03, PNorm = 172.3843, GNorm = 0.0581, lr_0 = 1.6311e-04
Loss = 1.6817e-03, PNorm = 172.3863, GNorm = 0.0565, lr_0 = 1.6299e-04
Loss = 1.8431e-03, PNorm = 172.3876, GNorm = 0.1853, lr_0 = 1.6288e-04
Loss = 1.3347e-03, PNorm = 172.3891, GNorm = 0.0377, lr_0 = 1.6277e-04
Loss = 2.1306e-03, PNorm = 172.3920, GNorm = 0.1390, lr_0 = 1.6266e-04
Loss = 1.1524e-03, PNorm = 172.3952, GNorm = 0.1295, lr_0 = 1.6255e-04
Loss = 1.5837e-03, PNorm = 172.3983, GNorm = 0.1323, lr_0 = 1.6244e-04
Loss = 2.4701e-03, PNorm = 172.4011, GNorm = 0.1300, lr_0 = 1.6233e-04
Loss = 1.3582e-03, PNorm = 172.4042, GNorm = 0.1277, lr_0 = 1.6221e-04
Loss = 1.2252e-03, PNorm = 172.4068, GNorm = 0.1055, lr_0 = 1.6210e-04
Loss = 1.4647e-03, PNorm = 172.4078, GNorm = 0.0731, lr_0 = 1.6199e-04
Loss = 1.2325e-03, PNorm = 172.4115, GNorm = 0.0479, lr_0 = 1.6188e-04
Loss = 3.8219e-03, PNorm = 172.4154, GNorm = 0.0448, lr_0 = 1.6177e-04
Loss = 1.7269e-03, PNorm = 172.4188, GNorm = 0.1299, lr_0 = 1.6166e-04
Loss = 2.2316e-03, PNorm = 172.4218, GNorm = 0.1092, lr_0 = 1.6155e-04
Loss = 1.3243e-03, PNorm = 172.4245, GNorm = 0.0485, lr_0 = 1.6144e-04
Loss = 3.2649e-03, PNorm = 172.4273, GNorm = 0.3201, lr_0 = 1.6133e-04
Loss = 1.5901e-03, PNorm = 172.4314, GNorm = 0.0486, lr_0 = 1.6122e-04
Loss = 1.4321e-03, PNorm = 172.4334, GNorm = 0.1399, lr_0 = 1.6111e-04
Loss = 2.8227e-03, PNorm = 172.4360, GNorm = 0.0983, lr_0 = 1.6100e-04
Loss = 1.6614e-03, PNorm = 172.4375, GNorm = 0.0864, lr_0 = 1.6089e-04
Loss = 2.0680e-03, PNorm = 172.4402, GNorm = 0.1127, lr_0 = 1.6078e-04
Loss = 1.1890e-03, PNorm = 172.4430, GNorm = 0.1367, lr_0 = 1.6067e-04
Loss = 1.3128e-03, PNorm = 172.4459, GNorm = 0.1644, lr_0 = 1.6056e-04
Loss = 1.7828e-03, PNorm = 172.4499, GNorm = 0.1815, lr_0 = 1.6045e-04
Loss = 2.8757e-03, PNorm = 172.4534, GNorm = 0.0567, lr_0 = 1.6034e-04
Loss = 1.0825e-03, PNorm = 172.4552, GNorm = 0.0627, lr_0 = 1.6023e-04
Loss = 1.4655e-03, PNorm = 172.4560, GNorm = 0.0671, lr_0 = 1.6012e-04
Loss = 1.2386e-03, PNorm = 172.4573, GNorm = 0.1849, lr_0 = 1.6001e-04
Loss = 3.0328e-03, PNorm = 172.4586, GNorm = 0.0462, lr_0 = 1.5990e-04
Loss = 2.5925e-03, PNorm = 172.4616, GNorm = 0.3949, lr_0 = 1.5979e-04
Loss = 2.4396e-03, PNorm = 172.4662, GNorm = 0.3506, lr_0 = 1.5968e-04
Loss = 1.0200e-03, PNorm = 172.4686, GNorm = 0.0299, lr_0 = 1.5957e-04
Loss = 1.3647e-03, PNorm = 172.4707, GNorm = 0.1152, lr_0 = 1.5946e-04
Loss = 1.1575e-03, PNorm = 172.4719, GNorm = 0.1948, lr_0 = 1.5935e-04
Loss = 3.0219e-03, PNorm = 172.4747, GNorm = 0.1200, lr_0 = 1.5924e-04
Loss = 1.8252e-03, PNorm = 172.4759, GNorm = 0.0807, lr_0 = 1.5913e-04
Loss = 3.0745e-03, PNorm = 172.4806, GNorm = 0.1854, lr_0 = 1.5902e-04
Loss = 1.8198e-03, PNorm = 172.4846, GNorm = 0.1567, lr_0 = 1.5891e-04
Loss = 2.1565e-03, PNorm = 172.4875, GNorm = 0.1256, lr_0 = 1.5880e-04
Loss = 1.5159e-03, PNorm = 172.4907, GNorm = 0.1175, lr_0 = 1.5870e-04
Loss = 1.4717e-03, PNorm = 172.4930, GNorm = 0.1734, lr_0 = 1.5859e-04
Loss = 2.4539e-03, PNorm = 172.4940, GNorm = 0.1547, lr_0 = 1.5848e-04
Loss = 1.1288e-03, PNorm = 172.4948, GNorm = 0.0381, lr_0 = 1.5837e-04
Loss = 2.6886e-03, PNorm = 172.4976, GNorm = 0.2288, lr_0 = 1.5826e-04
Loss = 1.2687e-03, PNorm = 172.5007, GNorm = 0.0755, lr_0 = 1.5815e-04
Loss = 2.0298e-03, PNorm = 172.5027, GNorm = 0.2452, lr_0 = 1.5804e-04
Loss = 1.8410e-03, PNorm = 172.5062, GNorm = 0.1765, lr_0 = 1.5794e-04
Loss = 1.2256e-03, PNorm = 172.5078, GNorm = 0.0814, lr_0 = 1.5783e-04
Loss = 1.1805e-03, PNorm = 172.5088, GNorm = 0.0677, lr_0 = 1.5772e-04
Loss = 2.1800e-03, PNorm = 172.5108, GNorm = 0.1230, lr_0 = 1.5761e-04
Loss = 2.1131e-03, PNorm = 172.5132, GNorm = 0.5132, lr_0 = 1.5750e-04
Loss = 2.3044e-03, PNorm = 172.5144, GNorm = 0.2201, lr_0 = 1.5740e-04
Loss = 1.4179e-03, PNorm = 172.5175, GNorm = 0.1431, lr_0 = 1.5729e-04
Loss = 2.0524e-03, PNorm = 172.5201, GNorm = 0.6817, lr_0 = 1.5718e-04
Loss = 1.8146e-03, PNorm = 172.5208, GNorm = 0.1237, lr_0 = 1.5707e-04
Loss = 2.9342e-03, PNorm = 172.5238, GNorm = 0.3426, lr_0 = 1.5697e-04
Loss = 1.5065e-03, PNorm = 172.5256, GNorm = 0.1253, lr_0 = 1.5686e-04
Loss = 2.1935e-03, PNorm = 172.5274, GNorm = 0.1903, lr_0 = 1.5675e-04
Loss = 2.8883e-03, PNorm = 172.5308, GNorm = 0.0634, lr_0 = 1.5664e-04
Loss = 1.7754e-03, PNorm = 172.5330, GNorm = 0.0429, lr_0 = 1.5654e-04
Loss = 1.6778e-03, PNorm = 172.5363, GNorm = 0.1632, lr_0 = 1.5643e-04
Loss = 1.1058e-03, PNorm = 172.5396, GNorm = 0.1308, lr_0 = 1.5632e-04
Loss = 1.1499e-03, PNorm = 172.5430, GNorm = 0.0563, lr_0 = 1.5621e-04
Loss = 1.5098e-03, PNorm = 172.5455, GNorm = 0.1711, lr_0 = 1.5611e-04
Loss = 2.1630e-03, PNorm = 172.5487, GNorm = 0.0564, lr_0 = 1.5600e-04
Loss = 1.3676e-03, PNorm = 172.5519, GNorm = 0.1482, lr_0 = 1.5589e-04
Loss = 2.3218e-03, PNorm = 172.5536, GNorm = 0.0284, lr_0 = 1.5579e-04
Loss = 1.8377e-03, PNorm = 172.5556, GNorm = 0.0367, lr_0 = 1.5568e-04
Loss = 1.3795e-03, PNorm = 172.5586, GNorm = 0.1123, lr_0 = 1.5557e-04
Loss = 4.1967e-03, PNorm = 172.5620, GNorm = 0.0879, lr_0 = 1.5547e-04
Loss = 3.3294e-03, PNorm = 172.5652, GNorm = 0.0416, lr_0 = 1.5536e-04
Loss = 1.3423e-03, PNorm = 172.5677, GNorm = 0.0901, lr_0 = 1.5525e-04
Loss = 2.6272e-03, PNorm = 172.5696, GNorm = 0.1170, lr_0 = 1.5515e-04
Loss = 3.6930e-03, PNorm = 172.5725, GNorm = 0.1779, lr_0 = 1.5504e-04
Loss = 1.2952e-03, PNorm = 172.5775, GNorm = 0.0478, lr_0 = 1.5493e-04
Loss = 2.0472e-03, PNorm = 172.5796, GNorm = 0.1433, lr_0 = 1.5483e-04
Loss = 3.7343e-03, PNorm = 172.5818, GNorm = 0.0587, lr_0 = 1.5472e-04
Loss = 2.5881e-03, PNorm = 172.5846, GNorm = 0.3771, lr_0 = 1.5462e-04
Loss = 1.2436e-03, PNorm = 172.5877, GNorm = 0.1210, lr_0 = 1.5451e-04
Loss = 2.2483e-03, PNorm = 172.5918, GNorm = 0.3430, lr_0 = 1.5440e-04
Loss = 1.2157e-03, PNorm = 172.5942, GNorm = 0.1069, lr_0 = 1.5430e-04
Loss = 1.8285e-03, PNorm = 172.5965, GNorm = 0.1323, lr_0 = 1.5419e-04
Loss = 1.2187e-03, PNorm = 172.5987, GNorm = 0.1648, lr_0 = 1.5409e-04
Loss = 2.1491e-03, PNorm = 172.6006, GNorm = 0.2071, lr_0 = 1.5398e-04
Loss = 1.3859e-03, PNorm = 172.6024, GNorm = 0.1099, lr_0 = 1.5388e-04
Loss = 1.6693e-03, PNorm = 172.6046, GNorm = 0.1194, lr_0 = 1.5377e-04
Loss = 1.2626e-03, PNorm = 172.6074, GNorm = 0.1106, lr_0 = 1.5367e-04
Loss = 1.4427e-03, PNorm = 172.6107, GNorm = 0.1182, lr_0 = 1.5356e-04
Loss = 2.1910e-03, PNorm = 172.6145, GNorm = 0.1276, lr_0 = 1.5346e-04
Loss = 1.6664e-03, PNorm = 172.6175, GNorm = 0.1320, lr_0 = 1.5335e-04
Loss = 1.6493e-03, PNorm = 172.6190, GNorm = 0.0469, lr_0 = 1.5325e-04
Loss = 2.3024e-03, PNorm = 172.6224, GNorm = 0.0720, lr_0 = 1.5314e-04
Loss = 1.4033e-03, PNorm = 172.6250, GNorm = 0.1550, lr_0 = 1.5304e-04
Loss = 1.8824e-03, PNorm = 172.6285, GNorm = 0.0945, lr_0 = 1.5293e-04
Loss = 2.7181e-03, PNorm = 172.6312, GNorm = 0.1690, lr_0 = 1.5283e-04
Loss = 1.0424e-03, PNorm = 172.6321, GNorm = 0.0417, lr_0 = 1.5272e-04
Loss = 2.1572e-03, PNorm = 172.6331, GNorm = 0.1012, lr_0 = 1.5262e-04
Loss = 1.5760e-03, PNorm = 172.6356, GNorm = 0.0783, lr_0 = 1.5251e-04
Loss = 1.0855e-03, PNorm = 172.6386, GNorm = 0.2530, lr_0 = 1.5241e-04
Loss = 1.7477e-03, PNorm = 172.6419, GNorm = 0.2790, lr_0 = 1.5230e-04
Loss = 3.9231e-03, PNorm = 172.6446, GNorm = 0.2736, lr_0 = 1.5220e-04
Loss = 2.3498e-03, PNorm = 172.6493, GNorm = 0.0966, lr_0 = 1.5209e-04
Loss = 2.1908e-03, PNorm = 172.6514, GNorm = 0.2337, lr_0 = 1.5199e-04
Loss = 1.6440e-03, PNorm = 172.6539, GNorm = 0.2759, lr_0 = 1.5189e-04
Loss = 1.7787e-03, PNorm = 172.6552, GNorm = 0.0702, lr_0 = 1.5178e-04
Loss = 1.3143e-03, PNorm = 172.6589, GNorm = 0.2366, lr_0 = 1.5168e-04
Loss = 1.3603e-03, PNorm = 172.6597, GNorm = 0.0469, lr_0 = 1.5157e-04
Loss = 1.4436e-03, PNorm = 172.6610, GNorm = 0.1709, lr_0 = 1.5147e-04
Loss = 1.6536e-03, PNorm = 172.6628, GNorm = 0.0887, lr_0 = 1.5137e-04
Loss = 1.4350e-03, PNorm = 172.6666, GNorm = 0.0487, lr_0 = 1.5126e-04
Loss = 1.8072e-03, PNorm = 172.6706, GNorm = 0.1590, lr_0 = 1.5116e-04
Loss = 2.3732e-03, PNorm = 172.6717, GNorm = 0.1121, lr_0 = 1.5106e-04
Loss = 2.9177e-03, PNorm = 172.6743, GNorm = 0.1380, lr_0 = 1.5095e-04
Loss = 1.3216e-03, PNorm = 172.6775, GNorm = 0.1390, lr_0 = 1.5085e-04
Validation mae = 0.278108
Epoch 25
Loss = 1.9437e-03, PNorm = 172.6810, GNorm = 0.0949, lr_0 = 1.5075e-04
Loss = 1.5665e-03, PNorm = 172.6814, GNorm = 0.1233, lr_0 = 1.5064e-04
Loss = 1.1875e-03, PNorm = 172.6831, GNorm = 0.1342, lr_0 = 1.5054e-04
Loss = 1.8365e-03, PNorm = 172.6843, GNorm = 0.0811, lr_0 = 1.5044e-04
Loss = 1.5102e-03, PNorm = 172.6863, GNorm = 0.1093, lr_0 = 1.5033e-04
Loss = 1.0275e-03, PNorm = 172.6876, GNorm = 0.0653, lr_0 = 1.5023e-04
Loss = 1.8799e-03, PNorm = 172.6895, GNorm = 0.1817, lr_0 = 1.5013e-04
Loss = 1.6735e-03, PNorm = 172.6917, GNorm = 0.1060, lr_0 = 1.5002e-04
Loss = 1.5074e-03, PNorm = 172.6934, GNorm = 0.1116, lr_0 = 1.4992e-04
Loss = 1.2299e-03, PNorm = 172.6949, GNorm = 0.0568, lr_0 = 1.4982e-04
Loss = 2.5157e-03, PNorm = 172.6967, GNorm = 0.0892, lr_0 = 1.4972e-04
Loss = 1.0967e-03, PNorm = 172.6991, GNorm = 0.1689, lr_0 = 1.4961e-04
Loss = 2.4125e-03, PNorm = 172.7018, GNorm = 0.1016, lr_0 = 1.4951e-04
Loss = 1.4919e-03, PNorm = 172.7049, GNorm = 0.0689, lr_0 = 1.4941e-04
Loss = 1.1364e-03, PNorm = 172.7068, GNorm = 0.0832, lr_0 = 1.4931e-04
Loss = 2.1366e-03, PNorm = 172.7088, GNorm = 0.1236, lr_0 = 1.4920e-04
Loss = 9.6570e-04, PNorm = 172.7108, GNorm = 0.1725, lr_0 = 1.4910e-04
Loss = 1.3567e-03, PNorm = 172.7123, GNorm = 0.0810, lr_0 = 1.4900e-04
Loss = 1.4177e-03, PNorm = 172.7152, GNorm = 0.0933, lr_0 = 1.4890e-04
Loss = 1.6959e-03, PNorm = 172.7173, GNorm = 0.0341, lr_0 = 1.4880e-04
Loss = 1.1302e-03, PNorm = 172.7189, GNorm = 0.0326, lr_0 = 1.4869e-04
Loss = 1.0054e-03, PNorm = 172.7213, GNorm = 0.1178, lr_0 = 1.4859e-04
Loss = 3.7606e-03, PNorm = 172.7245, GNorm = 0.1555, lr_0 = 1.4849e-04
Loss = 2.1669e-03, PNorm = 172.7272, GNorm = 0.0797, lr_0 = 1.4839e-04
Loss = 1.1825e-03, PNorm = 172.7317, GNorm = 0.1406, lr_0 = 1.4829e-04
Loss = 1.7081e-03, PNorm = 172.7339, GNorm = 0.1051, lr_0 = 1.4818e-04
Loss = 1.0204e-03, PNorm = 172.7359, GNorm = 0.0882, lr_0 = 1.4808e-04
Loss = 2.1454e-03, PNorm = 172.7377, GNorm = 0.0873, lr_0 = 1.4798e-04
Loss = 1.0713e-03, PNorm = 172.7393, GNorm = 0.0697, lr_0 = 1.4788e-04
Loss = 1.9924e-03, PNorm = 172.7390, GNorm = 0.0693, lr_0 = 1.4778e-04
Loss = 2.2271e-03, PNorm = 172.7436, GNorm = 0.1122, lr_0 = 1.4768e-04
Loss = 1.5270e-03, PNorm = 172.7468, GNorm = 0.2534, lr_0 = 1.4758e-04
Loss = 2.3143e-03, PNorm = 172.7492, GNorm = 0.1172, lr_0 = 1.4748e-04
Loss = 1.1084e-03, PNorm = 172.7505, GNorm = 0.0936, lr_0 = 1.4737e-04
Loss = 8.5836e-04, PNorm = 172.7513, GNorm = 0.1300, lr_0 = 1.4727e-04
Loss = 2.0478e-03, PNorm = 172.7522, GNorm = 0.0809, lr_0 = 1.4717e-04
Loss = 9.2625e-04, PNorm = 172.7540, GNorm = 0.0566, lr_0 = 1.4707e-04
Loss = 1.7486e-03, PNorm = 172.7557, GNorm = 0.0642, lr_0 = 1.4697e-04
Loss = 3.5511e-03, PNorm = 172.7579, GNorm = 0.1031, lr_0 = 1.4687e-04
Loss = 1.5021e-03, PNorm = 172.7610, GNorm = 0.1048, lr_0 = 1.4677e-04
Loss = 1.2550e-03, PNorm = 172.7641, GNorm = 0.1315, lr_0 = 1.4667e-04
Loss = 1.2894e-03, PNorm = 172.7659, GNorm = 0.1503, lr_0 = 1.4657e-04
Loss = 1.1882e-03, PNorm = 172.7682, GNorm = 0.0806, lr_0 = 1.4647e-04
Loss = 9.9712e-04, PNorm = 172.7708, GNorm = 0.0534, lr_0 = 1.4637e-04
Loss = 1.7976e-03, PNorm = 172.7731, GNorm = 0.1082, lr_0 = 1.4627e-04
Loss = 2.6319e-03, PNorm = 172.7760, GNorm = 0.1484, lr_0 = 1.4617e-04
Loss = 1.9797e-03, PNorm = 172.7788, GNorm = 0.0409, lr_0 = 1.4607e-04
Loss = 9.3565e-04, PNorm = 172.7806, GNorm = 0.0462, lr_0 = 1.4597e-04
Loss = 7.1414e-04, PNorm = 172.7821, GNorm = 0.0763, lr_0 = 1.4587e-04
Loss = 1.8245e-03, PNorm = 172.7843, GNorm = 0.0600, lr_0 = 1.4577e-04
Loss = 1.4899e-03, PNorm = 172.7885, GNorm = 0.2335, lr_0 = 1.4567e-04
Loss = 2.9778e-03, PNorm = 172.7911, GNorm = 0.0993, lr_0 = 1.4557e-04
Loss = 2.6143e-03, PNorm = 172.7942, GNorm = 0.0996, lr_0 = 1.4547e-04
Loss = 1.4415e-03, PNorm = 172.7956, GNorm = 0.1060, lr_0 = 1.4537e-04
Loss = 1.6183e-03, PNorm = 172.7982, GNorm = 0.2375, lr_0 = 1.4527e-04
Loss = 1.6395e-03, PNorm = 172.8008, GNorm = 0.1049, lr_0 = 1.4517e-04
Loss = 2.8667e-03, PNorm = 172.8027, GNorm = 0.1124, lr_0 = 1.4507e-04
Loss = 1.0880e-03, PNorm = 172.8040, GNorm = 0.0975, lr_0 = 1.4497e-04
Loss = 1.2617e-03, PNorm = 172.8062, GNorm = 0.0878, lr_0 = 1.4487e-04
Loss = 1.0341e-03, PNorm = 172.8099, GNorm = 0.1345, lr_0 = 1.4477e-04
Loss = 1.4160e-03, PNorm = 172.8112, GNorm = 0.1312, lr_0 = 1.4467e-04
Loss = 1.0707e-03, PNorm = 172.8131, GNorm = 0.1827, lr_0 = 1.4457e-04
Loss = 1.5875e-03, PNorm = 172.8152, GNorm = 0.2500, lr_0 = 1.4447e-04
Loss = 1.2000e-03, PNorm = 172.8171, GNorm = 0.0852, lr_0 = 1.4438e-04
Loss = 1.2391e-03, PNorm = 172.8196, GNorm = 0.1081, lr_0 = 1.4428e-04
Loss = 1.0744e-03, PNorm = 172.8228, GNorm = 0.0910, lr_0 = 1.4418e-04
Loss = 1.0383e-03, PNorm = 172.8249, GNorm = 0.0610, lr_0 = 1.4408e-04
Loss = 8.2083e-04, PNorm = 172.8265, GNorm = 0.1008, lr_0 = 1.4398e-04
Loss = 1.0738e-03, PNorm = 172.8271, GNorm = 0.1125, lr_0 = 1.4388e-04
Loss = 9.6397e-04, PNorm = 172.8288, GNorm = 0.0496, lr_0 = 1.4378e-04
Loss = 2.9207e-03, PNorm = 172.8308, GNorm = 0.1359, lr_0 = 1.4368e-04
Loss = 8.6824e-04, PNorm = 172.8321, GNorm = 0.1536, lr_0 = 1.4359e-04
Loss = 1.3410e-03, PNorm = 172.8334, GNorm = 0.0813, lr_0 = 1.4349e-04
Loss = 1.0757e-03, PNorm = 172.8349, GNorm = 0.0406, lr_0 = 1.4339e-04
Loss = 1.2530e-03, PNorm = 172.8375, GNorm = 0.0969, lr_0 = 1.4329e-04
Loss = 1.8026e-03, PNorm = 172.8396, GNorm = 0.0292, lr_0 = 1.4319e-04
Loss = 1.6620e-03, PNorm = 172.8419, GNorm = 0.1557, lr_0 = 1.4310e-04
Loss = 1.2572e-03, PNorm = 172.8439, GNorm = 0.0971, lr_0 = 1.4300e-04
Loss = 2.1984e-03, PNorm = 172.8470, GNorm = 0.0345, lr_0 = 1.4290e-04
Loss = 1.1516e-03, PNorm = 172.8494, GNorm = 0.0331, lr_0 = 1.4280e-04
Loss = 1.3244e-03, PNorm = 172.8520, GNorm = 0.0804, lr_0 = 1.4270e-04
Loss = 2.2011e-03, PNorm = 172.8539, GNorm = 0.0518, lr_0 = 1.4261e-04
Loss = 2.0402e-03, PNorm = 172.8547, GNorm = 0.1531, lr_0 = 1.4251e-04
Loss = 1.7675e-03, PNorm = 172.8555, GNorm = 0.0538, lr_0 = 1.4241e-04
Loss = 1.0451e-03, PNorm = 172.8584, GNorm = 0.0870, lr_0 = 1.4231e-04
Loss = 2.7705e-03, PNorm = 172.8608, GNorm = 0.1231, lr_0 = 1.4222e-04
Loss = 1.1832e-03, PNorm = 172.8642, GNorm = 0.0471, lr_0 = 1.4212e-04
Loss = 1.3456e-03, PNorm = 172.8667, GNorm = 0.0908, lr_0 = 1.4202e-04
Loss = 2.4047e-03, PNorm = 172.8684, GNorm = 0.0761, lr_0 = 1.4192e-04
Loss = 2.1814e-03, PNorm = 172.8695, GNorm = 0.0540, lr_0 = 1.4183e-04
Loss = 3.4741e-03, PNorm = 172.8692, GNorm = 0.1675, lr_0 = 1.4173e-04
Loss = 2.6511e-03, PNorm = 172.8706, GNorm = 0.1032, lr_0 = 1.4163e-04
Loss = 1.3606e-03, PNorm = 172.8729, GNorm = 0.0291, lr_0 = 1.4153e-04
Loss = 2.1027e-03, PNorm = 172.8761, GNorm = 0.0631, lr_0 = 1.4144e-04
Loss = 1.4852e-03, PNorm = 172.8790, GNorm = 0.1688, lr_0 = 1.4134e-04
Loss = 9.6440e-04, PNorm = 172.8807, GNorm = 0.1341, lr_0 = 1.4124e-04
Loss = 1.9611e-03, PNorm = 172.8826, GNorm = 0.0979, lr_0 = 1.4115e-04
Loss = 3.0262e-03, PNorm = 172.8856, GNorm = 0.0837, lr_0 = 1.4105e-04
Loss = 2.6730e-03, PNorm = 172.8882, GNorm = 0.1931, lr_0 = 1.4095e-04
Loss = 1.4515e-03, PNorm = 172.8907, GNorm = 0.2551, lr_0 = 1.4086e-04
Loss = 1.6567e-03, PNorm = 172.8951, GNorm = 0.1506, lr_0 = 1.4076e-04
Loss = 1.1439e-03, PNorm = 172.8984, GNorm = 0.1689, lr_0 = 1.4066e-04
Loss = 9.9375e-04, PNorm = 172.9014, GNorm = 0.1571, lr_0 = 1.4057e-04
Loss = 1.3543e-03, PNorm = 172.9061, GNorm = 0.2089, lr_0 = 1.4047e-04
Loss = 1.5765e-03, PNorm = 172.9106, GNorm = 0.0723, lr_0 = 1.4038e-04
Loss = 2.2757e-03, PNorm = 172.9134, GNorm = 0.2069, lr_0 = 1.4028e-04
Loss = 1.6748e-03, PNorm = 172.9164, GNorm = 0.0747, lr_0 = 1.4018e-04
Loss = 1.8295e-03, PNorm = 172.9195, GNorm = 0.1527, lr_0 = 1.4009e-04
Loss = 1.7228e-03, PNorm = 172.9210, GNorm = 0.1299, lr_0 = 1.3999e-04
Loss = 1.0945e-03, PNorm = 172.9225, GNorm = 0.0482, lr_0 = 1.3990e-04
Loss = 1.0569e-03, PNorm = 172.9236, GNorm = 0.1169, lr_0 = 1.3980e-04
Loss = 1.5113e-03, PNorm = 172.9257, GNorm = 0.0596, lr_0 = 1.3970e-04
Loss = 2.0511e-03, PNorm = 172.9290, GNorm = 0.1723, lr_0 = 1.3961e-04
Loss = 3.5462e-03, PNorm = 172.9311, GNorm = 0.0549, lr_0 = 1.3951e-04
Loss = 1.1589e-03, PNorm = 172.9333, GNorm = 0.2790, lr_0 = 1.3942e-04
Loss = 4.3450e-03, PNorm = 172.9340, GNorm = 0.1440, lr_0 = 1.3932e-04
Loss = 1.0575e-03, PNorm = 172.9345, GNorm = 0.0380, lr_0 = 1.3923e-04
Loss = 2.6292e-03, PNorm = 172.9369, GNorm = 0.0766, lr_0 = 1.3913e-04
Loss = 2.7349e-03, PNorm = 172.9389, GNorm = 0.0415, lr_0 = 1.3904e-04
Loss = 1.9063e-03, PNorm = 172.9414, GNorm = 0.0564, lr_0 = 1.3894e-04
Validation mae = 0.278028
Epoch 26
Loss = 2.9678e-03, PNorm = 172.9448, GNorm = 0.0598, lr_0 = 1.3884e-04
Loss = 1.4665e-03, PNorm = 172.9474, GNorm = 0.0690, lr_0 = 1.3875e-04
Loss = 1.2796e-03, PNorm = 172.9481, GNorm = 0.0673, lr_0 = 1.3865e-04
Loss = 8.9982e-04, PNorm = 172.9482, GNorm = 0.1471, lr_0 = 1.3856e-04
Loss = 1.9574e-03, PNorm = 172.9469, GNorm = 0.0398, lr_0 = 1.3846e-04
Loss = 1.1210e-03, PNorm = 172.9475, GNorm = 0.0753, lr_0 = 1.3837e-04
Loss = 1.6495e-03, PNorm = 172.9500, GNorm = 0.0859, lr_0 = 1.3828e-04
Loss = 1.7029e-03, PNorm = 172.9522, GNorm = 0.0956, lr_0 = 1.3818e-04
Loss = 4.4670e-03, PNorm = 172.9546, GNorm = 0.2475, lr_0 = 1.3809e-04
Loss = 2.0726e-03, PNorm = 172.9572, GNorm = 0.0699, lr_0 = 1.3799e-04
Loss = 2.1278e-03, PNorm = 172.9590, GNorm = 0.0641, lr_0 = 1.3790e-04
Loss = 1.1321e-03, PNorm = 172.9588, GNorm = 0.0487, lr_0 = 1.3780e-04
Loss = 1.9831e-03, PNorm = 172.9596, GNorm = 0.0377, lr_0 = 1.3771e-04
Loss = 1.5001e-03, PNorm = 172.9618, GNorm = 0.0389, lr_0 = 1.3761e-04
Loss = 9.2099e-04, PNorm = 172.9649, GNorm = 0.0438, lr_0 = 1.3752e-04
Loss = 8.7051e-04, PNorm = 172.9677, GNorm = 0.0360, lr_0 = 1.3742e-04
Loss = 9.2000e-04, PNorm = 172.9700, GNorm = 0.0713, lr_0 = 1.3733e-04
Loss = 1.0410e-03, PNorm = 172.9730, GNorm = 0.0642, lr_0 = 1.3724e-04
Loss = 1.4375e-03, PNorm = 172.9761, GNorm = 0.0846, lr_0 = 1.3714e-04
Loss = 1.7651e-03, PNorm = 172.9787, GNorm = 0.1389, lr_0 = 1.3705e-04
Loss = 1.1353e-03, PNorm = 172.9800, GNorm = 0.0674, lr_0 = 1.3695e-04
Loss = 1.0109e-03, PNorm = 172.9803, GNorm = 0.0635, lr_0 = 1.3686e-04
Loss = 9.1897e-04, PNorm = 172.9813, GNorm = 0.0791, lr_0 = 1.3677e-04
Loss = 1.2863e-03, PNorm = 172.9833, GNorm = 0.0998, lr_0 = 1.3667e-04
Loss = 9.1364e-04, PNorm = 172.9854, GNorm = 0.0901, lr_0 = 1.3658e-04
Loss = 1.2474e-03, PNorm = 172.9868, GNorm = 0.1982, lr_0 = 1.3649e-04
Loss = 1.0686e-03, PNorm = 172.9879, GNorm = 0.0467, lr_0 = 1.3639e-04
Loss = 3.7655e-03, PNorm = 172.9866, GNorm = 0.0929, lr_0 = 1.3630e-04
Loss = 1.0080e-03, PNorm = 172.9870, GNorm = 0.1272, lr_0 = 1.3621e-04
Loss = 1.3588e-03, PNorm = 172.9893, GNorm = 0.1222, lr_0 = 1.3611e-04
Loss = 1.1360e-03, PNorm = 172.9915, GNorm = 0.1112, lr_0 = 1.3602e-04
Loss = 1.0030e-03, PNorm = 172.9939, GNorm = 0.0519, lr_0 = 1.3593e-04
Loss = 1.1896e-03, PNorm = 172.9972, GNorm = 0.0574, lr_0 = 1.3583e-04
Loss = 8.6037e-04, PNorm = 172.9989, GNorm = 0.0498, lr_0 = 1.3574e-04
Loss = 1.1115e-03, PNorm = 172.9998, GNorm = 0.0899, lr_0 = 1.3565e-04
Loss = 7.9486e-04, PNorm = 173.0014, GNorm = 0.0528, lr_0 = 1.3555e-04
Loss = 1.6716e-03, PNorm = 173.0025, GNorm = 0.0487, lr_0 = 1.3546e-04
Loss = 1.5208e-03, PNorm = 173.0043, GNorm = 0.0518, lr_0 = 1.3537e-04
Loss = 6.7175e-04, PNorm = 173.0065, GNorm = 0.0361, lr_0 = 1.3528e-04
Loss = 1.3459e-03, PNorm = 173.0078, GNorm = 0.0549, lr_0 = 1.3518e-04
Loss = 1.0135e-03, PNorm = 173.0099, GNorm = 0.1233, lr_0 = 1.3509e-04
Loss = 1.7265e-03, PNorm = 173.0125, GNorm = 0.0615, lr_0 = 1.3500e-04
Loss = 9.8275e-04, PNorm = 173.0135, GNorm = 0.1315, lr_0 = 1.3491e-04
Loss = 1.1363e-03, PNorm = 173.0154, GNorm = 0.1029, lr_0 = 1.3481e-04
Loss = 1.2190e-03, PNorm = 173.0156, GNorm = 0.1113, lr_0 = 1.3472e-04
Loss = 1.1412e-03, PNorm = 173.0168, GNorm = 0.1960, lr_0 = 1.3463e-04
Loss = 1.9687e-03, PNorm = 173.0177, GNorm = 0.0308, lr_0 = 1.3454e-04
Loss = 1.4846e-03, PNorm = 173.0189, GNorm = 0.0980, lr_0 = 1.3444e-04
Loss = 8.5795e-04, PNorm = 173.0204, GNorm = 0.1773, lr_0 = 1.3435e-04
Loss = 1.0517e-03, PNorm = 173.0213, GNorm = 0.0805, lr_0 = 1.3426e-04
Loss = 2.9431e-03, PNorm = 173.0228, GNorm = 0.3878, lr_0 = 1.3417e-04
Loss = 1.0324e-03, PNorm = 173.0249, GNorm = 0.0901, lr_0 = 1.3408e-04
Loss = 1.0531e-03, PNorm = 173.0255, GNorm = 0.0383, lr_0 = 1.3398e-04
Loss = 1.0825e-03, PNorm = 173.0273, GNorm = 0.0716, lr_0 = 1.3389e-04
Loss = 1.3570e-03, PNorm = 173.0285, GNorm = 0.1626, lr_0 = 1.3380e-04
Loss = 1.0253e-03, PNorm = 173.0303, GNorm = 0.0922, lr_0 = 1.3371e-04
Loss = 1.9432e-03, PNorm = 173.0313, GNorm = 0.1262, lr_0 = 1.3362e-04
Loss = 7.7461e-04, PNorm = 173.0340, GNorm = 0.0427, lr_0 = 1.3353e-04
Loss = 1.1325e-03, PNorm = 173.0360, GNorm = 0.0272, lr_0 = 1.3343e-04
Loss = 8.8624e-04, PNorm = 173.0380, GNorm = 0.1393, lr_0 = 1.3334e-04
Loss = 1.3238e-03, PNorm = 173.0402, GNorm = 0.1726, lr_0 = 1.3325e-04
Loss = 1.0226e-03, PNorm = 173.0419, GNorm = 0.0704, lr_0 = 1.3316e-04
Loss = 2.5176e-03, PNorm = 173.0427, GNorm = 0.0847, lr_0 = 1.3307e-04
Loss = 1.1340e-03, PNorm = 173.0433, GNorm = 0.1348, lr_0 = 1.3298e-04
Loss = 1.7028e-03, PNorm = 173.0450, GNorm = 0.0786, lr_0 = 1.3289e-04
Loss = 3.5879e-03, PNorm = 173.0476, GNorm = 0.3827, lr_0 = 1.3280e-04
Loss = 1.3625e-03, PNorm = 173.0512, GNorm = 0.1646, lr_0 = 1.3270e-04
Loss = 1.5452e-03, PNorm = 173.0541, GNorm = 0.0662, lr_0 = 1.3261e-04
Loss = 4.6700e-03, PNorm = 173.0566, GNorm = 0.1152, lr_0 = 1.3252e-04
Loss = 1.8366e-03, PNorm = 173.0591, GNorm = 0.0512, lr_0 = 1.3243e-04
Loss = 1.3550e-03, PNorm = 173.0611, GNorm = 0.0465, lr_0 = 1.3234e-04
Loss = 1.0763e-03, PNorm = 173.0615, GNorm = 0.1291, lr_0 = 1.3225e-04
Loss = 2.7776e-03, PNorm = 173.0642, GNorm = 0.1014, lr_0 = 1.3216e-04
Loss = 2.0168e-03, PNorm = 173.0661, GNorm = 0.0605, lr_0 = 1.3207e-04
Loss = 1.3002e-03, PNorm = 173.0686, GNorm = 0.1795, lr_0 = 1.3198e-04
Loss = 7.7409e-04, PNorm = 173.0709, GNorm = 0.2184, lr_0 = 1.3189e-04
Loss = 3.9192e-03, PNorm = 173.0751, GNorm = 0.1732, lr_0 = 1.3180e-04
Loss = 1.5516e-03, PNorm = 173.0770, GNorm = 0.1737, lr_0 = 1.3171e-04
Loss = 1.8662e-03, PNorm = 173.0802, GNorm = 0.0881, lr_0 = 1.3162e-04
Loss = 2.6178e-03, PNorm = 173.0815, GNorm = 0.0555, lr_0 = 1.3153e-04
Loss = 1.8795e-03, PNorm = 173.0831, GNorm = 0.1124, lr_0 = 1.3144e-04
Loss = 1.0461e-03, PNorm = 173.0856, GNorm = 0.0556, lr_0 = 1.3135e-04
Loss = 1.2284e-03, PNorm = 173.0889, GNorm = 0.1082, lr_0 = 1.3126e-04
Loss = 8.5136e-04, PNorm = 173.0909, GNorm = 0.0662, lr_0 = 1.3117e-04
Loss = 9.0233e-04, PNorm = 173.0920, GNorm = 0.0660, lr_0 = 1.3108e-04
Loss = 2.2274e-03, PNorm = 173.0945, GNorm = 0.1452, lr_0 = 1.3099e-04
Loss = 5.1231e-03, PNorm = 173.0969, GNorm = 0.2385, lr_0 = 1.3090e-04
Loss = 2.1144e-03, PNorm = 173.0975, GNorm = 0.1687, lr_0 = 1.3081e-04
Loss = 1.5528e-03, PNorm = 173.0992, GNorm = 0.0575, lr_0 = 1.3072e-04
Loss = 1.2188e-03, PNorm = 173.1018, GNorm = 0.1889, lr_0 = 1.3063e-04
Loss = 8.3533e-04, PNorm = 173.1050, GNorm = 0.0684, lr_0 = 1.3054e-04
Loss = 1.6986e-03, PNorm = 173.1055, GNorm = 0.1005, lr_0 = 1.3045e-04
Loss = 9.1075e-04, PNorm = 173.1065, GNorm = 0.1560, lr_0 = 1.3036e-04
Loss = 1.2172e-03, PNorm = 173.1090, GNorm = 0.0565, lr_0 = 1.3027e-04
Loss = 1.0581e-03, PNorm = 173.1107, GNorm = 0.0839, lr_0 = 1.3018e-04
Loss = 8.8175e-04, PNorm = 173.1120, GNorm = 0.0375, lr_0 = 1.3009e-04
Loss = 8.7332e-04, PNorm = 173.1135, GNorm = 0.1983, lr_0 = 1.3000e-04
Loss = 1.0241e-03, PNorm = 173.1153, GNorm = 0.1813, lr_0 = 1.2992e-04
Loss = 2.6883e-03, PNorm = 173.1156, GNorm = 0.0530, lr_0 = 1.2983e-04
Loss = 9.9603e-04, PNorm = 173.1170, GNorm = 0.0731, lr_0 = 1.2974e-04
Loss = 1.3710e-03, PNorm = 173.1196, GNorm = 0.0738, lr_0 = 1.2965e-04
Loss = 2.0676e-03, PNorm = 173.1210, GNorm = 0.3515, lr_0 = 1.2956e-04
Loss = 8.8728e-04, PNorm = 173.1238, GNorm = 0.1233, lr_0 = 1.2947e-04
Loss = 1.7912e-03, PNorm = 173.1271, GNorm = 0.1115, lr_0 = 1.2938e-04
Loss = 1.4757e-03, PNorm = 173.1292, GNorm = 0.0672, lr_0 = 1.2929e-04
Loss = 1.2219e-03, PNorm = 173.1314, GNorm = 0.0569, lr_0 = 1.2921e-04
Loss = 7.6936e-04, PNorm = 173.1345, GNorm = 0.1098, lr_0 = 1.2912e-04
Loss = 2.0448e-03, PNorm = 173.1359, GNorm = 0.1055, lr_0 = 1.2903e-04
Loss = 1.4821e-03, PNorm = 173.1373, GNorm = 0.0643, lr_0 = 1.2894e-04
Loss = 9.9793e-04, PNorm = 173.1384, GNorm = 0.1303, lr_0 = 1.2885e-04
Loss = 1.8861e-03, PNorm = 173.1403, GNorm = 0.0683, lr_0 = 1.2876e-04
Loss = 1.8259e-03, PNorm = 173.1439, GNorm = 0.0927, lr_0 = 1.2867e-04
Loss = 3.3082e-03, PNorm = 173.1471, GNorm = 0.1338, lr_0 = 1.2859e-04
Loss = 1.3059e-03, PNorm = 173.1494, GNorm = 0.0386, lr_0 = 1.2850e-04
Loss = 1.2927e-03, PNorm = 173.1511, GNorm = 0.1014, lr_0 = 1.2841e-04
Loss = 8.4863e-04, PNorm = 173.1527, GNorm = 0.1116, lr_0 = 1.2832e-04
Loss = 1.8198e-03, PNorm = 173.1550, GNorm = 0.4154, lr_0 = 1.2823e-04
Loss = 2.4735e-03, PNorm = 173.1572, GNorm = 0.1348, lr_0 = 1.2815e-04
Loss = 1.1222e-03, PNorm = 173.1600, GNorm = 0.0999, lr_0 = 1.2806e-04
Loss = 9.5625e-04, PNorm = 173.1628, GNorm = 0.0689, lr_0 = 1.2797e-04
Validation mae = 0.277970
Epoch 27
Loss = 2.2707e-03, PNorm = 173.1634, GNorm = 0.2211, lr_0 = 1.2788e-04
Loss = 8.0519e-04, PNorm = 173.1642, GNorm = 0.2326, lr_0 = 1.2780e-04
Loss = 9.1234e-04, PNorm = 173.1641, GNorm = 0.0823, lr_0 = 1.2771e-04
Loss = 1.0212e-03, PNorm = 173.1661, GNorm = 0.0396, lr_0 = 1.2762e-04
Loss = 7.9601e-04, PNorm = 173.1676, GNorm = 0.1762, lr_0 = 1.2753e-04
Loss = 7.9577e-04, PNorm = 173.1702, GNorm = 0.0670, lr_0 = 1.2745e-04
Loss = 8.7333e-04, PNorm = 173.1706, GNorm = 0.0730, lr_0 = 1.2736e-04
Loss = 9.3072e-04, PNorm = 173.1712, GNorm = 0.0751, lr_0 = 1.2727e-04
Loss = 1.3916e-03, PNorm = 173.1715, GNorm = 0.1423, lr_0 = 1.2718e-04
Loss = 1.2646e-03, PNorm = 173.1728, GNorm = 0.1150, lr_0 = 1.2710e-04
Loss = 1.8965e-03, PNorm = 173.1751, GNorm = 0.0834, lr_0 = 1.2701e-04
Loss = 1.3288e-03, PNorm = 173.1771, GNorm = 0.0781, lr_0 = 1.2692e-04
Loss = 7.5466e-04, PNorm = 173.1787, GNorm = 0.0706, lr_0 = 1.2684e-04
Loss = 1.3331e-03, PNorm = 173.1795, GNorm = 0.0779, lr_0 = 1.2675e-04
Loss = 1.0460e-03, PNorm = 173.1810, GNorm = 0.0738, lr_0 = 1.2666e-04
Loss = 7.4883e-04, PNorm = 173.1833, GNorm = 0.0849, lr_0 = 1.2658e-04
Loss = 2.2586e-03, PNorm = 173.1848, GNorm = 0.0328, lr_0 = 1.2649e-04
Loss = 6.9363e-04, PNorm = 173.1874, GNorm = 0.1188, lr_0 = 1.2640e-04
Loss = 9.9499e-04, PNorm = 173.1896, GNorm = 0.2544, lr_0 = 1.2632e-04
Loss = 8.8532e-04, PNorm = 173.1907, GNorm = 0.0688, lr_0 = 1.2623e-04
Loss = 2.6636e-03, PNorm = 173.1926, GNorm = 0.0879, lr_0 = 1.2614e-04
Loss = 9.4437e-04, PNorm = 173.1929, GNorm = 0.0508, lr_0 = 1.2606e-04
Loss = 1.2962e-03, PNorm = 173.1938, GNorm = 0.0298, lr_0 = 1.2597e-04
Loss = 1.6657e-03, PNorm = 173.1956, GNorm = 0.0620, lr_0 = 1.2588e-04
Loss = 6.6263e-04, PNorm = 173.1969, GNorm = 0.1020, lr_0 = 1.2580e-04
Loss = 7.4174e-04, PNorm = 173.1979, GNorm = 0.1608, lr_0 = 1.2571e-04
Loss = 9.0122e-04, PNorm = 173.1978, GNorm = 0.0975, lr_0 = 1.2563e-04
Loss = 6.1384e-04, PNorm = 173.1994, GNorm = 0.0464, lr_0 = 1.2554e-04
Loss = 1.5133e-03, PNorm = 173.2002, GNorm = 0.0405, lr_0 = 1.2545e-04
Loss = 8.1498e-04, PNorm = 173.2021, GNorm = 0.0389, lr_0 = 1.2537e-04
Loss = 1.2549e-03, PNorm = 173.2043, GNorm = 0.0620, lr_0 = 1.2528e-04
Loss = 1.4249e-03, PNorm = 173.2072, GNorm = 0.0217, lr_0 = 1.2520e-04
Loss = 1.5543e-03, PNorm = 173.2107, GNorm = 0.0624, lr_0 = 1.2511e-04
Loss = 8.0621e-04, PNorm = 173.2119, GNorm = 0.1069, lr_0 = 1.2502e-04
Loss = 8.8258e-04, PNorm = 173.2130, GNorm = 0.0435, lr_0 = 1.2494e-04
Loss = 9.7353e-04, PNorm = 173.2130, GNorm = 0.0641, lr_0 = 1.2485e-04
Loss = 9.7951e-04, PNorm = 173.2132, GNorm = 0.0919, lr_0 = 1.2477e-04
Loss = 2.6159e-03, PNorm = 173.2131, GNorm = 0.0767, lr_0 = 1.2468e-04
Loss = 1.5991e-03, PNorm = 173.2144, GNorm = 0.1383, lr_0 = 1.2460e-04
Loss = 2.5431e-03, PNorm = 173.2157, GNorm = 0.0755, lr_0 = 1.2451e-04
Loss = 9.2925e-04, PNorm = 173.2182, GNorm = 0.0802, lr_0 = 1.2443e-04
Loss = 9.7038e-04, PNorm = 173.2220, GNorm = 0.0976, lr_0 = 1.2434e-04
Loss = 1.2073e-03, PNorm = 173.2233, GNorm = 0.1270, lr_0 = 1.2426e-04
Loss = 7.4466e-04, PNorm = 173.2245, GNorm = 0.0572, lr_0 = 1.2417e-04
Loss = 7.3921e-04, PNorm = 173.2252, GNorm = 0.0483, lr_0 = 1.2409e-04
Loss = 1.8966e-03, PNorm = 173.2268, GNorm = 0.2554, lr_0 = 1.2400e-04
Loss = 9.3161e-04, PNorm = 173.2281, GNorm = 0.1338, lr_0 = 1.2392e-04
Loss = 1.6057e-03, PNorm = 173.2308, GNorm = 0.1631, lr_0 = 1.2383e-04
Loss = 2.9737e-03, PNorm = 173.2323, GNorm = 0.1020, lr_0 = 1.2375e-04
Loss = 8.0204e-04, PNorm = 173.2338, GNorm = 0.1772, lr_0 = 1.2366e-04
Loss = 3.7911e-03, PNorm = 173.2357, GNorm = 0.0777, lr_0 = 1.2358e-04
Loss = 1.3263e-03, PNorm = 173.2370, GNorm = 0.1677, lr_0 = 1.2349e-04
Loss = 2.5211e-03, PNorm = 173.2366, GNorm = 0.0968, lr_0 = 1.2341e-04
Loss = 1.5671e-03, PNorm = 173.2368, GNorm = 0.0868, lr_0 = 1.2332e-04
Loss = 1.5526e-03, PNorm = 173.2384, GNorm = 0.1279, lr_0 = 1.2324e-04
Loss = 1.2888e-03, PNorm = 173.2390, GNorm = 0.0707, lr_0 = 1.2315e-04
Loss = 1.0168e-03, PNorm = 173.2403, GNorm = 0.1110, lr_0 = 1.2307e-04
Loss = 1.9533e-03, PNorm = 173.2424, GNorm = 0.2537, lr_0 = 1.2298e-04
Loss = 6.5200e-04, PNorm = 173.2443, GNorm = 0.1521, lr_0 = 1.2290e-04
Loss = 9.1879e-04, PNorm = 173.2462, GNorm = 0.1436, lr_0 = 1.2282e-04
Loss = 1.1197e-03, PNorm = 173.2481, GNorm = 0.0741, lr_0 = 1.2273e-04
Loss = 9.0878e-04, PNorm = 173.2511, GNorm = 0.1486, lr_0 = 1.2265e-04
Loss = 7.1811e-04, PNorm = 173.2536, GNorm = 0.0335, lr_0 = 1.2256e-04
Loss = 4.4620e-03, PNorm = 173.2563, GNorm = 0.0873, lr_0 = 1.2248e-04
Loss = 3.4235e-03, PNorm = 173.2573, GNorm = 0.0527, lr_0 = 1.2240e-04
Loss = 1.2571e-03, PNorm = 173.2595, GNorm = 0.0968, lr_0 = 1.2231e-04
Loss = 1.0618e-03, PNorm = 173.2618, GNorm = 0.0948, lr_0 = 1.2223e-04
Loss = 1.0514e-03, PNorm = 173.2626, GNorm = 0.1463, lr_0 = 1.2214e-04
Loss = 1.6152e-03, PNorm = 173.2636, GNorm = 0.1990, lr_0 = 1.2206e-04
Loss = 1.6405e-03, PNorm = 173.2651, GNorm = 0.0485, lr_0 = 1.2198e-04
Loss = 1.0085e-03, PNorm = 173.2669, GNorm = 0.0532, lr_0 = 1.2189e-04
Loss = 1.2948e-03, PNorm = 173.2688, GNorm = 0.0977, lr_0 = 1.2181e-04
Loss = 1.5623e-03, PNorm = 173.2719, GNorm = 0.1414, lr_0 = 1.2173e-04
Loss = 1.1948e-03, PNorm = 173.2733, GNorm = 0.0703, lr_0 = 1.2164e-04
Loss = 1.5274e-03, PNorm = 173.2745, GNorm = 0.0442, lr_0 = 1.2156e-04
Loss = 8.5183e-04, PNorm = 173.2750, GNorm = 0.0303, lr_0 = 1.2148e-04
Loss = 9.4686e-04, PNorm = 173.2762, GNorm = 0.2232, lr_0 = 1.2139e-04
Loss = 1.2715e-03, PNorm = 173.2779, GNorm = 0.0293, lr_0 = 1.2131e-04
Loss = 2.8714e-03, PNorm = 173.2794, GNorm = 0.0424, lr_0 = 1.2123e-04
Loss = 7.4280e-04, PNorm = 173.2812, GNorm = 0.0726, lr_0 = 1.2114e-04
Loss = 1.3002e-03, PNorm = 173.2825, GNorm = 0.0591, lr_0 = 1.2106e-04
Loss = 1.2421e-03, PNorm = 173.2849, GNorm = 0.1922, lr_0 = 1.2098e-04
Loss = 7.1551e-04, PNorm = 173.2867, GNorm = 0.0541, lr_0 = 1.2090e-04
Loss = 2.0547e-03, PNorm = 173.2882, GNorm = 0.0418, lr_0 = 1.2081e-04
Loss = 2.7307e-03, PNorm = 173.2903, GNorm = 0.0576, lr_0 = 1.2073e-04
Loss = 1.9970e-03, PNorm = 173.2929, GNorm = 0.1018, lr_0 = 1.2065e-04
Loss = 8.8237e-04, PNorm = 173.2947, GNorm = 0.0840, lr_0 = 1.2056e-04
Loss = 9.6693e-04, PNorm = 173.2957, GNorm = 0.1194, lr_0 = 1.2048e-04
Loss = 3.1115e-03, PNorm = 173.2965, GNorm = 0.1150, lr_0 = 1.2040e-04
Loss = 1.2402e-03, PNorm = 173.2989, GNorm = 0.0773, lr_0 = 1.2032e-04
Loss = 2.1935e-03, PNorm = 173.3010, GNorm = 0.1854, lr_0 = 1.2023e-04
Loss = 9.4020e-04, PNorm = 173.3037, GNorm = 0.1274, lr_0 = 1.2015e-04
Loss = 8.1597e-04, PNorm = 173.3053, GNorm = 0.0367, lr_0 = 1.2007e-04
Loss = 1.0926e-03, PNorm = 173.3072, GNorm = 0.2192, lr_0 = 1.1999e-04
Loss = 1.6575e-03, PNorm = 173.3094, GNorm = 0.0701, lr_0 = 1.1991e-04
Loss = 6.9854e-04, PNorm = 173.3110, GNorm = 0.0419, lr_0 = 1.1982e-04
Loss = 8.5195e-04, PNorm = 173.3117, GNorm = 0.1360, lr_0 = 1.1974e-04
Loss = 9.6744e-04, PNorm = 173.3135, GNorm = 0.0680, lr_0 = 1.1966e-04
Loss = 2.0975e-03, PNorm = 173.3151, GNorm = 0.1355, lr_0 = 1.1958e-04
Loss = 1.4053e-03, PNorm = 173.3178, GNorm = 0.0958, lr_0 = 1.1950e-04
Loss = 1.1642e-03, PNorm = 173.3186, GNorm = 0.1532, lr_0 = 1.1941e-04
Loss = 1.4472e-03, PNorm = 173.3202, GNorm = 0.0896, lr_0 = 1.1933e-04
Loss = 1.3883e-03, PNorm = 173.3202, GNorm = 0.1029, lr_0 = 1.1925e-04
Loss = 1.0332e-03, PNorm = 173.3219, GNorm = 0.0647, lr_0 = 1.1917e-04
Loss = 3.0112e-03, PNorm = 173.3225, GNorm = 0.0639, lr_0 = 1.1909e-04
Loss = 9.5035e-04, PNorm = 173.3238, GNorm = 0.0463, lr_0 = 1.1901e-04
Loss = 1.4046e-03, PNorm = 173.3257, GNorm = 0.0654, lr_0 = 1.1892e-04
Loss = 1.9843e-03, PNorm = 173.3281, GNorm = 0.1205, lr_0 = 1.1884e-04
Loss = 8.9727e-04, PNorm = 173.3297, GNorm = 0.0727, lr_0 = 1.1876e-04
Loss = 1.3605e-03, PNorm = 173.3311, GNorm = 0.0693, lr_0 = 1.1868e-04
Loss = 2.3480e-03, PNorm = 173.3328, GNorm = 0.0900, lr_0 = 1.1860e-04
Loss = 1.9297e-03, PNorm = 173.3329, GNorm = 0.0734, lr_0 = 1.1852e-04
Loss = 7.9549e-04, PNorm = 173.3344, GNorm = 0.0844, lr_0 = 1.1844e-04
Loss = 5.2663e-03, PNorm = 173.3366, GNorm = 0.4674, lr_0 = 1.1835e-04
Loss = 1.1536e-03, PNorm = 173.3385, GNorm = 0.0580, lr_0 = 1.1827e-04
Loss = 6.4150e-04, PNorm = 173.3402, GNorm = 0.0930, lr_0 = 1.1819e-04
Loss = 1.8127e-03, PNorm = 173.3421, GNorm = 0.0513, lr_0 = 1.1811e-04
Loss = 1.1494e-03, PNorm = 173.3436, GNorm = 0.0331, lr_0 = 1.1803e-04
Loss = 1.5501e-03, PNorm = 173.3444, GNorm = 0.1233, lr_0 = 1.1795e-04
Loss = 9.5364e-04, PNorm = 173.3457, GNorm = 0.0663, lr_0 = 1.1787e-04
Validation mae = 0.277836
Epoch 28
Loss = 1.0378e-03, PNorm = 173.3469, GNorm = 0.1202, lr_0 = 1.1779e-04
Loss = 2.5366e-03, PNorm = 173.3497, GNorm = 0.1066, lr_0 = 1.1771e-04
Loss = 8.8678e-04, PNorm = 173.3504, GNorm = 0.1274, lr_0 = 1.1763e-04
Loss = 1.1308e-03, PNorm = 173.3513, GNorm = 0.1109, lr_0 = 1.1755e-04
Loss = 1.5688e-03, PNorm = 173.3534, GNorm = 0.1112, lr_0 = 1.1747e-04
Loss = 8.3592e-04, PNorm = 173.3547, GNorm = 0.1853, lr_0 = 1.1739e-04
Loss = 1.3502e-03, PNorm = 173.3562, GNorm = 0.0854, lr_0 = 1.1730e-04
Loss = 7.3936e-04, PNorm = 173.3567, GNorm = 0.0913, lr_0 = 1.1722e-04
Loss = 2.7928e-03, PNorm = 173.3580, GNorm = 0.2736, lr_0 = 1.1714e-04
Loss = 1.2395e-03, PNorm = 173.3595, GNorm = 0.0351, lr_0 = 1.1706e-04
Loss = 1.2650e-03, PNorm = 173.3607, GNorm = 0.0916, lr_0 = 1.1698e-04
Loss = 1.4279e-03, PNorm = 173.3619, GNorm = 0.0991, lr_0 = 1.1690e-04
Loss = 1.8197e-03, PNorm = 173.3636, GNorm = 0.0506, lr_0 = 1.1682e-04
Loss = 8.8905e-04, PNorm = 173.3647, GNorm = 0.1650, lr_0 = 1.1674e-04
Loss = 1.3606e-03, PNorm = 173.3661, GNorm = 0.0754, lr_0 = 1.1666e-04
Loss = 7.4955e-04, PNorm = 173.3671, GNorm = 0.0778, lr_0 = 1.1658e-04
Loss = 1.3141e-03, PNorm = 173.3668, GNorm = 0.0476, lr_0 = 1.1650e-04
Loss = 1.8721e-03, PNorm = 173.3673, GNorm = 0.0444, lr_0 = 1.1642e-04
Loss = 6.8542e-04, PNorm = 173.3688, GNorm = 0.0866, lr_0 = 1.1634e-04
Loss = 1.2470e-03, PNorm = 173.3693, GNorm = 0.1038, lr_0 = 1.1626e-04
Loss = 1.6336e-03, PNorm = 173.3715, GNorm = 0.0709, lr_0 = 1.1618e-04
Loss = 7.0776e-04, PNorm = 173.3723, GNorm = 0.0643, lr_0 = 1.1611e-04
Loss = 5.6391e-04, PNorm = 173.3723, GNorm = 0.0604, lr_0 = 1.1603e-04
Loss = 6.3596e-04, PNorm = 173.3729, GNorm = 0.1264, lr_0 = 1.1595e-04
Loss = 1.1902e-03, PNorm = 173.3734, GNorm = 0.0839, lr_0 = 1.1587e-04
Loss = 6.7790e-04, PNorm = 173.3751, GNorm = 0.0590, lr_0 = 1.1579e-04
Loss = 7.7292e-04, PNorm = 173.3768, GNorm = 0.2123, lr_0 = 1.1571e-04
Loss = 9.4805e-04, PNorm = 173.3803, GNorm = 0.0757, lr_0 = 1.1563e-04
Loss = 5.3679e-04, PNorm = 173.3827, GNorm = 0.0394, lr_0 = 1.1555e-04
Loss = 1.3766e-03, PNorm = 173.3855, GNorm = 0.0576, lr_0 = 1.1547e-04
Loss = 3.2132e-03, PNorm = 173.3874, GNorm = 0.3713, lr_0 = 1.1539e-04
Loss = 6.4265e-04, PNorm = 173.3875, GNorm = 0.0503, lr_0 = 1.1531e-04
Loss = 1.6757e-03, PNorm = 173.3876, GNorm = 0.1365, lr_0 = 1.1523e-04
Loss = 8.3376e-04, PNorm = 173.3869, GNorm = 0.0491, lr_0 = 1.1515e-04
Loss = 7.8049e-04, PNorm = 173.3885, GNorm = 0.1999, lr_0 = 1.1508e-04
Loss = 1.4593e-03, PNorm = 173.3888, GNorm = 0.0897, lr_0 = 1.1500e-04
Loss = 8.1491e-04, PNorm = 173.3885, GNorm = 0.0460, lr_0 = 1.1492e-04
Loss = 9.3399e-04, PNorm = 173.3888, GNorm = 0.0988, lr_0 = 1.1484e-04
Loss = 1.2103e-03, PNorm = 173.3901, GNorm = 0.0390, lr_0 = 1.1476e-04
Loss = 8.1667e-04, PNorm = 173.3911, GNorm = 0.1084, lr_0 = 1.1468e-04
Loss = 8.0817e-04, PNorm = 173.3932, GNorm = 0.1399, lr_0 = 1.1460e-04
Loss = 9.9134e-04, PNorm = 173.3939, GNorm = 0.0436, lr_0 = 1.1452e-04
Loss = 5.1118e-04, PNorm = 173.3946, GNorm = 0.1177, lr_0 = 1.1445e-04
Loss = 3.4167e-03, PNorm = 173.3947, GNorm = 0.0567, lr_0 = 1.1437e-04
Loss = 8.7961e-04, PNorm = 173.3945, GNorm = 0.0857, lr_0 = 1.1429e-04
Loss = 1.9373e-03, PNorm = 173.3952, GNorm = 0.0566, lr_0 = 1.1421e-04
Loss = 1.1199e-03, PNorm = 173.3955, GNorm = 0.0898, lr_0 = 1.1413e-04
Loss = 2.2741e-03, PNorm = 173.3965, GNorm = 0.1881, lr_0 = 1.1405e-04
Loss = 9.3678e-04, PNorm = 173.3990, GNorm = 0.1979, lr_0 = 1.1398e-04
Loss = 1.0897e-03, PNorm = 173.4010, GNorm = 0.0354, lr_0 = 1.1390e-04
Loss = 8.6386e-04, PNorm = 173.4019, GNorm = 0.1614, lr_0 = 1.1382e-04
Loss = 1.3556e-03, PNorm = 173.4029, GNorm = 0.0612, lr_0 = 1.1374e-04
Loss = 7.0091e-04, PNorm = 173.4033, GNorm = 0.0483, lr_0 = 1.1366e-04
Loss = 1.6006e-03, PNorm = 173.4053, GNorm = 0.3190, lr_0 = 1.1359e-04
Loss = 2.2119e-03, PNorm = 173.4077, GNorm = 0.1329, lr_0 = 1.1351e-04
Loss = 2.0733e-03, PNorm = 173.4091, GNorm = 0.1049, lr_0 = 1.1343e-04
Loss = 8.1428e-04, PNorm = 173.4093, GNorm = 0.0831, lr_0 = 1.1335e-04
Loss = 5.8923e-04, PNorm = 173.4101, GNorm = 0.0528, lr_0 = 1.1328e-04
Loss = 5.5185e-04, PNorm = 173.4119, GNorm = 0.0236, lr_0 = 1.1320e-04
Loss = 6.0820e-04, PNorm = 173.4135, GNorm = 0.1520, lr_0 = 1.1312e-04
Loss = 1.6411e-03, PNorm = 173.4151, GNorm = 0.0663, lr_0 = 1.1304e-04
Loss = 6.5051e-04, PNorm = 173.4174, GNorm = 0.0591, lr_0 = 1.1297e-04
Loss = 1.2137e-03, PNorm = 173.4194, GNorm = 0.1258, lr_0 = 1.1289e-04
Loss = 7.6467e-04, PNorm = 173.4211, GNorm = 0.0822, lr_0 = 1.1281e-04
Loss = 7.0164e-04, PNorm = 173.4229, GNorm = 0.1353, lr_0 = 1.1273e-04
Loss = 8.7111e-04, PNorm = 173.4231, GNorm = 0.0613, lr_0 = 1.1266e-04
Loss = 1.6640e-03, PNorm = 173.4249, GNorm = 0.0872, lr_0 = 1.1258e-04
Loss = 6.4859e-04, PNorm = 173.4261, GNorm = 0.0822, lr_0 = 1.1250e-04
Loss = 1.5535e-03, PNorm = 173.4274, GNorm = 0.0665, lr_0 = 1.1243e-04
Loss = 5.2271e-04, PNorm = 173.4278, GNorm = 0.0753, lr_0 = 1.1235e-04
Loss = 4.8982e-03, PNorm = 173.4282, GNorm = 0.1208, lr_0 = 1.1227e-04
Loss = 2.9284e-03, PNorm = 173.4302, GNorm = 0.2114, lr_0 = 1.1219e-04
Loss = 6.4351e-04, PNorm = 173.4324, GNorm = 0.0583, lr_0 = 1.1212e-04
Loss = 7.3317e-04, PNorm = 173.4343, GNorm = 0.0696, lr_0 = 1.1204e-04
Loss = 2.8171e-03, PNorm = 173.4356, GNorm = 0.2254, lr_0 = 1.1196e-04
Loss = 6.7734e-04, PNorm = 173.4359, GNorm = 0.0961, lr_0 = 1.1189e-04
Loss = 6.9456e-04, PNorm = 173.4375, GNorm = 0.2843, lr_0 = 1.1181e-04
Loss = 7.2951e-04, PNorm = 173.4400, GNorm = 0.0403, lr_0 = 1.1173e-04
Loss = 1.1084e-03, PNorm = 173.4434, GNorm = 0.2403, lr_0 = 1.1166e-04
Loss = 1.5930e-03, PNorm = 173.4442, GNorm = 0.0358, lr_0 = 1.1158e-04
Loss = 1.6497e-03, PNorm = 173.4456, GNorm = 0.0990, lr_0 = 1.1150e-04
Loss = 1.5336e-03, PNorm = 173.4465, GNorm = 0.1207, lr_0 = 1.1143e-04
Loss = 9.3428e-04, PNorm = 173.4473, GNorm = 0.0436, lr_0 = 1.1135e-04
Loss = 1.6164e-03, PNorm = 173.4475, GNorm = 0.3110, lr_0 = 1.1128e-04
Loss = 9.7440e-04, PNorm = 173.4472, GNorm = 0.1556, lr_0 = 1.1120e-04
Loss = 1.6219e-03, PNorm = 173.4483, GNorm = 0.0943, lr_0 = 1.1112e-04
Loss = 1.6923e-03, PNorm = 173.4514, GNorm = 0.0629, lr_0 = 1.1105e-04
Loss = 8.8858e-04, PNorm = 173.4536, GNorm = 0.1616, lr_0 = 1.1097e-04
Loss = 1.1843e-03, PNorm = 173.4565, GNorm = 0.0638, lr_0 = 1.1089e-04
Loss = 5.3877e-04, PNorm = 173.4589, GNorm = 0.0915, lr_0 = 1.1082e-04
Loss = 1.1603e-03, PNorm = 173.4617, GNorm = 0.0884, lr_0 = 1.1074e-04
Loss = 1.8919e-03, PNorm = 173.4619, GNorm = 0.0887, lr_0 = 1.1067e-04
Loss = 6.5905e-04, PNorm = 173.4626, GNorm = 0.0876, lr_0 = 1.1059e-04
Loss = 1.4018e-03, PNorm = 173.4635, GNorm = 0.1032, lr_0 = 1.1052e-04
Loss = 2.1482e-03, PNorm = 173.4647, GNorm = 0.0956, lr_0 = 1.1044e-04
Loss = 1.1630e-03, PNorm = 173.4667, GNorm = 0.1393, lr_0 = 1.1036e-04
Loss = 1.0667e-03, PNorm = 173.4678, GNorm = 0.1320, lr_0 = 1.1029e-04
Loss = 1.2001e-03, PNorm = 173.4708, GNorm = 0.0223, lr_0 = 1.1021e-04
Loss = 6.5613e-04, PNorm = 173.4730, GNorm = 0.0850, lr_0 = 1.1014e-04
Loss = 1.8327e-03, PNorm = 173.4739, GNorm = 0.0431, lr_0 = 1.1006e-04
Loss = 1.4752e-03, PNorm = 173.4767, GNorm = 0.0863, lr_0 = 1.0999e-04
Loss = 1.1007e-03, PNorm = 173.4786, GNorm = 0.0892, lr_0 = 1.0991e-04
Loss = 8.0488e-04, PNorm = 173.4806, GNorm = 0.1100, lr_0 = 1.0984e-04
Loss = 1.9998e-03, PNorm = 173.4820, GNorm = 0.0738, lr_0 = 1.0976e-04
Loss = 4.7963e-04, PNorm = 173.4836, GNorm = 0.0281, lr_0 = 1.0969e-04
Loss = 2.3911e-03, PNorm = 173.4839, GNorm = 0.0448, lr_0 = 1.0961e-04
Loss = 2.3047e-03, PNorm = 173.4840, GNorm = 0.0733, lr_0 = 1.0954e-04
Loss = 1.2311e-03, PNorm = 173.4856, GNorm = 0.0651, lr_0 = 1.0946e-04
Loss = 1.7194e-03, PNorm = 173.4865, GNorm = 0.1150, lr_0 = 1.0939e-04
Loss = 1.4700e-03, PNorm = 173.4887, GNorm = 0.1408, lr_0 = 1.0931e-04
Loss = 1.1619e-03, PNorm = 173.4911, GNorm = 0.0483, lr_0 = 1.0924e-04
Loss = 1.6403e-03, PNorm = 173.4946, GNorm = 0.1303, lr_0 = 1.0916e-04
Loss = 1.8171e-03, PNorm = 173.4954, GNorm = 0.2027, lr_0 = 1.0909e-04
Loss = 3.2565e-03, PNorm = 173.4972, GNorm = 0.0938, lr_0 = 1.0901e-04
Loss = 1.7551e-03, PNorm = 173.4986, GNorm = 0.0277, lr_0 = 1.0894e-04
Loss = 8.6634e-04, PNorm = 173.5014, GNorm = 0.0310, lr_0 = 1.0886e-04
Loss = 8.5796e-04, PNorm = 173.5038, GNorm = 0.0730, lr_0 = 1.0879e-04
Loss = 5.8910e-04, PNorm = 173.5056, GNorm = 0.0748, lr_0 = 1.0871e-04
Loss = 1.1847e-03, PNorm = 173.5068, GNorm = 0.1816, lr_0 = 1.0864e-04
Loss = 6.5939e-04, PNorm = 173.5103, GNorm = 0.0713, lr_0 = 1.0856e-04
Validation mae = 0.277545
Epoch 29
Loss = 1.7342e-03, PNorm = 173.5132, GNorm = 0.0795, lr_0 = 1.0849e-04
Loss = 7.4432e-04, PNorm = 173.5142, GNorm = 0.0398, lr_0 = 1.0841e-04
Loss = 1.2706e-03, PNorm = 173.5159, GNorm = 0.0381, lr_0 = 1.0834e-04
Loss = 5.0716e-04, PNorm = 173.5165, GNorm = 0.0303, lr_0 = 1.0827e-04
Loss = 5.3277e-04, PNorm = 173.5167, GNorm = 0.0520, lr_0 = 1.0819e-04
Loss = 1.6488e-03, PNorm = 173.5168, GNorm = 0.0812, lr_0 = 1.0812e-04
Loss = 1.7416e-03, PNorm = 173.5175, GNorm = 0.0718, lr_0 = 1.0804e-04
Loss = 2.8058e-03, PNorm = 173.5176, GNorm = 0.3793, lr_0 = 1.0797e-04
Loss = 7.6448e-04, PNorm = 173.5184, GNorm = 0.1753, lr_0 = 1.0790e-04
Loss = 4.8915e-04, PNorm = 173.5184, GNorm = 0.0289, lr_0 = 1.0782e-04
Loss = 8.6589e-04, PNorm = 173.5187, GNorm = 0.0897, lr_0 = 1.0775e-04
Loss = 6.9808e-04, PNorm = 173.5201, GNorm = 0.0488, lr_0 = 1.0767e-04
Loss = 2.1443e-03, PNorm = 173.5210, GNorm = 0.0622, lr_0 = 1.0760e-04
Loss = 8.0684e-04, PNorm = 173.5225, GNorm = 0.0657, lr_0 = 1.0753e-04
Loss = 8.0508e-04, PNorm = 173.5237, GNorm = 0.0721, lr_0 = 1.0745e-04
Loss = 6.2454e-04, PNorm = 173.5254, GNorm = 0.1223, lr_0 = 1.0738e-04
Loss = 5.2910e-04, PNorm = 173.5269, GNorm = 0.0786, lr_0 = 1.0731e-04
Loss = 8.1514e-04, PNorm = 173.5279, GNorm = 0.3275, lr_0 = 1.0723e-04
Loss = 2.1445e-03, PNorm = 173.5292, GNorm = 0.1350, lr_0 = 1.0716e-04
Loss = 6.3298e-04, PNorm = 173.5307, GNorm = 0.1832, lr_0 = 1.0709e-04
Loss = 2.6966e-03, PNorm = 173.5319, GNorm = 0.1917, lr_0 = 1.0701e-04
Loss = 1.9604e-03, PNorm = 173.5329, GNorm = 0.1613, lr_0 = 1.0694e-04
Loss = 2.3288e-03, PNorm = 173.5336, GNorm = 0.1119, lr_0 = 1.0687e-04
Loss = 1.3816e-03, PNorm = 173.5351, GNorm = 0.0775, lr_0 = 1.0679e-04
Loss = 8.1014e-04, PNorm = 173.5361, GNorm = 0.0471, lr_0 = 1.0672e-04
Loss = 1.4721e-03, PNorm = 173.5357, GNorm = 0.0631, lr_0 = 1.0665e-04
Loss = 1.6419e-03, PNorm = 173.5374, GNorm = 0.0385, lr_0 = 1.0657e-04
Loss = 8.3959e-04, PNorm = 173.5372, GNorm = 0.0572, lr_0 = 1.0650e-04
Loss = 5.7500e-04, PNorm = 173.5382, GNorm = 0.0610, lr_0 = 1.0643e-04
Loss = 2.2051e-03, PNorm = 173.5391, GNorm = 0.1532, lr_0 = 1.0635e-04
Loss = 1.1740e-03, PNorm = 173.5401, GNorm = 0.1485, lr_0 = 1.0628e-04
Loss = 1.1670e-03, PNorm = 173.5411, GNorm = 0.0817, lr_0 = 1.0621e-04
Loss = 5.8047e-04, PNorm = 173.5426, GNorm = 0.0638, lr_0 = 1.0614e-04
Loss = 1.1836e-03, PNorm = 173.5429, GNorm = 0.0219, lr_0 = 1.0606e-04
Loss = 6.4597e-04, PNorm = 173.5439, GNorm = 0.0399, lr_0 = 1.0599e-04
Loss = 5.9800e-04, PNorm = 173.5447, GNorm = 0.1355, lr_0 = 1.0592e-04
Loss = 2.1184e-03, PNorm = 173.5468, GNorm = 0.0273, lr_0 = 1.0585e-04
Loss = 1.9856e-03, PNorm = 173.5487, GNorm = 0.1793, lr_0 = 1.0577e-04
Loss = 6.4193e-04, PNorm = 173.5496, GNorm = 0.1570, lr_0 = 1.0570e-04
Loss = 1.0245e-03, PNorm = 173.5507, GNorm = 0.1315, lr_0 = 1.0563e-04
Loss = 4.6792e-04, PNorm = 173.5517, GNorm = 0.0853, lr_0 = 1.0556e-04
Loss = 9.3205e-04, PNorm = 173.5524, GNorm = 0.0407, lr_0 = 1.0548e-04
Loss = 1.4250e-03, PNorm = 173.5541, GNorm = 0.0948, lr_0 = 1.0541e-04
Loss = 4.5722e-04, PNorm = 173.5547, GNorm = 0.0519, lr_0 = 1.0534e-04
Loss = 1.0134e-03, PNorm = 173.5559, GNorm = 0.0328, lr_0 = 1.0527e-04
Loss = 2.1463e-03, PNorm = 173.5579, GNorm = 0.0438, lr_0 = 1.0519e-04
Loss = 1.1888e-03, PNorm = 173.5594, GNorm = 0.0578, lr_0 = 1.0512e-04
Loss = 1.5047e-03, PNorm = 173.5593, GNorm = 0.0580, lr_0 = 1.0505e-04
Loss = 4.6804e-04, PNorm = 173.5596, GNorm = 0.0535, lr_0 = 1.0498e-04
Loss = 9.7551e-04, PNorm = 173.5596, GNorm = 0.0926, lr_0 = 1.0491e-04
Loss = 1.5367e-03, PNorm = 173.5605, GNorm = 0.0651, lr_0 = 1.0483e-04
Loss = 4.6824e-04, PNorm = 173.5620, GNorm = 0.1212, lr_0 = 1.0476e-04
Loss = 5.8585e-04, PNorm = 173.5625, GNorm = 0.0783, lr_0 = 1.0469e-04
Loss = 6.5627e-04, PNorm = 173.5642, GNorm = 0.0491, lr_0 = 1.0462e-04
Loss = 7.2592e-04, PNorm = 173.5658, GNorm = 0.2188, lr_0 = 1.0455e-04
Loss = 5.4893e-04, PNorm = 173.5672, GNorm = 0.0654, lr_0 = 1.0448e-04
Loss = 5.5362e-04, PNorm = 173.5679, GNorm = 0.0819, lr_0 = 1.0440e-04
Loss = 7.2895e-04, PNorm = 173.5681, GNorm = 0.0686, lr_0 = 1.0433e-04
Loss = 6.5225e-04, PNorm = 173.5697, GNorm = 0.0507, lr_0 = 1.0426e-04
Loss = 1.4132e-03, PNorm = 173.5697, GNorm = 0.1093, lr_0 = 1.0419e-04
Loss = 9.6039e-04, PNorm = 173.5708, GNorm = 0.0319, lr_0 = 1.0412e-04
Loss = 1.5546e-03, PNorm = 173.5727, GNorm = 0.1503, lr_0 = 1.0405e-04
Loss = 6.9281e-04, PNorm = 173.5735, GNorm = 0.0734, lr_0 = 1.0398e-04
Loss = 9.7340e-04, PNorm = 173.5746, GNorm = 0.1083, lr_0 = 1.0391e-04
Loss = 8.3716e-04, PNorm = 173.5766, GNorm = 0.1034, lr_0 = 1.0383e-04
Loss = 1.7572e-03, PNorm = 173.5785, GNorm = 0.0643, lr_0 = 1.0376e-04
Loss = 7.5227e-04, PNorm = 173.5786, GNorm = 0.0555, lr_0 = 1.0369e-04
Loss = 8.2687e-04, PNorm = 173.5792, GNorm = 0.0715, lr_0 = 1.0362e-04
Loss = 5.4016e-04, PNorm = 173.5813, GNorm = 0.0335, lr_0 = 1.0355e-04
Loss = 1.5736e-03, PNorm = 173.5834, GNorm = 0.0358, lr_0 = 1.0348e-04
Loss = 1.1431e-03, PNorm = 173.5840, GNorm = 0.1301, lr_0 = 1.0341e-04
Loss = 6.3701e-04, PNorm = 173.5838, GNorm = 0.0949, lr_0 = 1.0334e-04
Loss = 6.8348e-04, PNorm = 173.5840, GNorm = 0.0945, lr_0 = 1.0327e-04
Loss = 1.1349e-03, PNorm = 173.5860, GNorm = 0.0478, lr_0 = 1.0320e-04
Loss = 2.8283e-03, PNorm = 173.5874, GNorm = 0.1577, lr_0 = 1.0312e-04
Loss = 5.8082e-04, PNorm = 173.5888, GNorm = 0.0427, lr_0 = 1.0305e-04
Loss = 1.4592e-03, PNorm = 173.5898, GNorm = 0.0288, lr_0 = 1.0298e-04
Loss = 1.0393e-03, PNorm = 173.5915, GNorm = 0.1766, lr_0 = 1.0291e-04
Loss = 5.0539e-04, PNorm = 173.5926, GNorm = 0.0387, lr_0 = 1.0284e-04
Loss = 1.2696e-03, PNorm = 173.5939, GNorm = 0.0966, lr_0 = 1.0277e-04
Loss = 4.1154e-03, PNorm = 173.5960, GNorm = 0.0656, lr_0 = 1.0270e-04
Loss = 2.0094e-03, PNorm = 173.5977, GNorm = 0.0409, lr_0 = 1.0263e-04
Loss = 7.3314e-04, PNorm = 173.5997, GNorm = 0.0544, lr_0 = 1.0256e-04
Loss = 2.5418e-03, PNorm = 173.6006, GNorm = 0.1028, lr_0 = 1.0249e-04
Loss = 1.2823e-03, PNorm = 173.6015, GNorm = 0.1680, lr_0 = 1.0242e-04
Loss = 1.1308e-03, PNorm = 173.6020, GNorm = 0.0282, lr_0 = 1.0235e-04
Loss = 9.8723e-04, PNorm = 173.6042, GNorm = 0.1403, lr_0 = 1.0228e-04
Loss = 7.3983e-04, PNorm = 173.6053, GNorm = 0.0848, lr_0 = 1.0221e-04
Loss = 8.9520e-04, PNorm = 173.6066, GNorm = 0.0598, lr_0 = 1.0214e-04
Loss = 1.7126e-03, PNorm = 173.6074, GNorm = 0.0380, lr_0 = 1.0207e-04
Loss = 1.4982e-03, PNorm = 173.6076, GNorm = 0.0555, lr_0 = 1.0200e-04
Loss = 5.0687e-04, PNorm = 173.6093, GNorm = 0.0519, lr_0 = 1.0193e-04
Loss = 1.0450e-03, PNorm = 173.6095, GNorm = 0.1186, lr_0 = 1.0186e-04
Loss = 2.9439e-03, PNorm = 173.6097, GNorm = 0.0780, lr_0 = 1.0179e-04
Loss = 1.0210e-03, PNorm = 173.6107, GNorm = 0.0748, lr_0 = 1.0172e-04
Loss = 1.0249e-03, PNorm = 173.6138, GNorm = 0.1208, lr_0 = 1.0165e-04
Loss = 1.3125e-03, PNorm = 173.6159, GNorm = 0.0617, lr_0 = 1.0158e-04
Loss = 6.6490e-04, PNorm = 173.6176, GNorm = 0.1622, lr_0 = 1.0151e-04
Loss = 1.2968e-03, PNorm = 173.6195, GNorm = 0.0572, lr_0 = 1.0144e-04
Loss = 8.8713e-04, PNorm = 173.6202, GNorm = 0.0972, lr_0 = 1.0137e-04
Loss = 1.4681e-03, PNorm = 173.6196, GNorm = 0.0346, lr_0 = 1.0130e-04
Loss = 2.0409e-03, PNorm = 173.6202, GNorm = 0.0533, lr_0 = 1.0123e-04
Loss = 1.6759e-03, PNorm = 173.6220, GNorm = 0.0994, lr_0 = 1.0116e-04
Loss = 1.3046e-03, PNorm = 173.6223, GNorm = 0.0887, lr_0 = 1.0110e-04
Loss = 1.4589e-03, PNorm = 173.6245, GNorm = 0.3795, lr_0 = 1.0103e-04
Loss = 1.2377e-03, PNorm = 173.6243, GNorm = 0.1276, lr_0 = 1.0096e-04
Loss = 5.3707e-04, PNorm = 173.6268, GNorm = 0.1177, lr_0 = 1.0089e-04
Loss = 6.7400e-04, PNorm = 173.6277, GNorm = 0.0667, lr_0 = 1.0082e-04
Loss = 5.0279e-04, PNorm = 173.6284, GNorm = 0.0436, lr_0 = 1.0075e-04
Loss = 7.1672e-04, PNorm = 173.6294, GNorm = 0.1098, lr_0 = 1.0068e-04
Loss = 5.1648e-04, PNorm = 173.6306, GNorm = 0.0975, lr_0 = 1.0061e-04
Loss = 2.3613e-03, PNorm = 173.6309, GNorm = 0.0526, lr_0 = 1.0054e-04
Loss = 6.0976e-04, PNorm = 173.6327, GNorm = 0.0574, lr_0 = 1.0047e-04
Loss = 1.3558e-03, PNorm = 173.6346, GNorm = 0.1029, lr_0 = 1.0041e-04
Loss = 2.6649e-03, PNorm = 173.6366, GNorm = 0.4136, lr_0 = 1.0034e-04
Loss = 2.2008e-03, PNorm = 173.6381, GNorm = 0.1361, lr_0 = 1.0027e-04
Loss = 9.3143e-04, PNorm = 173.6393, GNorm = 0.1163, lr_0 = 1.0020e-04
Loss = 1.2146e-03, PNorm = 173.6402, GNorm = 0.0522, lr_0 = 1.0013e-04
Loss = 2.0669e-03, PNorm = 173.6414, GNorm = 0.1448, lr_0 = 1.0006e-04
Loss = 5.9463e-04, PNorm = 173.6426, GNorm = 0.1011, lr_0 = 1.0000e-04
Validation mae = 0.277702
Model 0 best validation mae = 0.277545 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.273545
Ensemble test mae = 0.273545
Fold 3
Splitting data with seed 3
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 8.7160e-01, PNorm = 65.7655, GNorm = 1.9626, lr_0 = 1.0413e-04
Loss = 6.6676e-01, PNorm = 65.7786, GNorm = 2.4943, lr_0 = 1.0788e-04
Loss = 6.2758e-01, PNorm = 65.7927, GNorm = 2.1824, lr_0 = 1.1163e-04
Loss = 4.9811e-01, PNorm = 65.8049, GNorm = 2.3671, lr_0 = 1.1537e-04
Loss = 5.4515e-01, PNorm = 65.8147, GNorm = 3.3629, lr_0 = 1.1913e-04
Loss = 4.9094e-01, PNorm = 65.8244, GNorm = 2.3454, lr_0 = 1.2287e-04
Loss = 4.4022e-01, PNorm = 65.8338, GNorm = 3.1168, lr_0 = 1.2663e-04
Loss = 4.1648e-01, PNorm = 65.8421, GNorm = 1.7903, lr_0 = 1.3038e-04
Loss = 3.2338e-01, PNorm = 65.8510, GNorm = 1.8913, lr_0 = 1.3413e-04
Loss = 3.7683e-01, PNorm = 65.8585, GNorm = 1.9083, lr_0 = 1.3788e-04
Loss = 3.5788e-01, PNorm = 65.8659, GNorm = 2.0090, lr_0 = 1.4163e-04
Loss = 4.0295e-01, PNorm = 65.8766, GNorm = 2.1057, lr_0 = 1.4537e-04
Loss = 3.6049e-01, PNorm = 65.8892, GNorm = 2.1108, lr_0 = 1.4913e-04
Loss = 3.8401e-01, PNorm = 65.9008, GNorm = 1.7370, lr_0 = 1.5288e-04
Loss = 3.6043e-01, PNorm = 65.9111, GNorm = 2.1964, lr_0 = 1.5662e-04
Loss = 3.2577e-01, PNorm = 65.9207, GNorm = 1.7851, lr_0 = 1.6038e-04
Loss = 3.7719e-01, PNorm = 65.9342, GNorm = 1.9705, lr_0 = 1.6412e-04
Loss = 3.5248e-01, PNorm = 65.9485, GNorm = 1.9882, lr_0 = 1.6788e-04
Loss = 3.3160e-01, PNorm = 65.9608, GNorm = 1.6084, lr_0 = 1.7163e-04
Loss = 3.3897e-01, PNorm = 65.9737, GNorm = 2.2557, lr_0 = 1.7538e-04
Loss = 3.6917e-01, PNorm = 65.9849, GNorm = 2.6487, lr_0 = 1.7913e-04
Loss = 3.2146e-01, PNorm = 65.9986, GNorm = 1.9178, lr_0 = 1.8288e-04
Loss = 3.1193e-01, PNorm = 66.0155, GNorm = 3.0080, lr_0 = 1.8662e-04
Loss = 3.4992e-01, PNorm = 66.0291, GNorm = 1.6567, lr_0 = 1.9038e-04
Loss = 3.3960e-01, PNorm = 66.0422, GNorm = 1.5758, lr_0 = 1.9413e-04
Loss = 3.5627e-01, PNorm = 66.0596, GNorm = 1.9537, lr_0 = 1.9788e-04
Loss = 3.4218e-01, PNorm = 66.0769, GNorm = 1.5494, lr_0 = 2.0163e-04
Loss = 3.2838e-01, PNorm = 66.0929, GNorm = 2.1463, lr_0 = 2.0537e-04
Loss = 3.5021e-01, PNorm = 66.1065, GNorm = 3.2002, lr_0 = 2.0913e-04
Loss = 3.0106e-01, PNorm = 66.1236, GNorm = 2.4353, lr_0 = 2.1288e-04
Loss = 3.1024e-01, PNorm = 66.1397, GNorm = 1.8750, lr_0 = 2.1663e-04
Loss = 3.2465e-01, PNorm = 66.1552, GNorm = 1.4999, lr_0 = 2.2038e-04
Loss = 3.5037e-01, PNorm = 66.1704, GNorm = 2.8597, lr_0 = 2.2412e-04
Loss = 3.4273e-01, PNorm = 66.1875, GNorm = 1.4480, lr_0 = 2.2787e-04
Loss = 3.5406e-01, PNorm = 66.2093, GNorm = 1.8396, lr_0 = 2.3163e-04
Loss = 3.0876e-01, PNorm = 66.2278, GNorm = 1.8541, lr_0 = 2.3538e-04
Loss = 3.4568e-01, PNorm = 66.2470, GNorm = 1.4936, lr_0 = 2.3913e-04
Loss = 3.1939e-01, PNorm = 66.2647, GNorm = 1.6746, lr_0 = 2.4288e-04
Loss = 3.4378e-01, PNorm = 66.2868, GNorm = 1.7618, lr_0 = 2.4662e-04
Loss = 3.0237e-01, PNorm = 66.3076, GNorm = 1.8865, lr_0 = 2.5038e-04
Loss = 2.9517e-01, PNorm = 66.3268, GNorm = 2.2346, lr_0 = 2.5413e-04
Loss = 3.1743e-01, PNorm = 66.3490, GNorm = 1.7702, lr_0 = 2.5788e-04
Loss = 3.2022e-01, PNorm = 66.3737, GNorm = 1.6232, lr_0 = 2.6163e-04
Loss = 3.1063e-01, PNorm = 66.3974, GNorm = 1.4465, lr_0 = 2.6537e-04
Loss = 3.1109e-01, PNorm = 66.4211, GNorm = 1.5354, lr_0 = 2.6912e-04
Loss = 2.8093e-01, PNorm = 66.4432, GNorm = 1.4173, lr_0 = 2.7288e-04
Loss = 2.8292e-01, PNorm = 66.4650, GNorm = 1.0860, lr_0 = 2.7663e-04
Loss = 3.2625e-01, PNorm = 66.4863, GNorm = 1.2193, lr_0 = 2.8038e-04
Loss = 2.8404e-01, PNorm = 66.5072, GNorm = 1.0873, lr_0 = 2.8413e-04
Loss = 2.7331e-01, PNorm = 66.5315, GNorm = 1.4083, lr_0 = 2.8787e-04
Loss = 2.9605e-01, PNorm = 66.5583, GNorm = 1.6873, lr_0 = 2.9163e-04
Loss = 2.8210e-01, PNorm = 66.5797, GNorm = 1.1535, lr_0 = 2.9538e-04
Loss = 2.8056e-01, PNorm = 66.6040, GNorm = 1.4856, lr_0 = 2.9913e-04
Loss = 2.9624e-01, PNorm = 66.6308, GNorm = 1.5209, lr_0 = 3.0288e-04
Loss = 2.9281e-01, PNorm = 66.6543, GNorm = 1.2219, lr_0 = 3.0662e-04
Loss = 3.3099e-01, PNorm = 66.6822, GNorm = 1.5018, lr_0 = 3.1037e-04
Loss = 2.7574e-01, PNorm = 66.7125, GNorm = 1.0407, lr_0 = 3.1413e-04
Loss = 2.9998e-01, PNorm = 66.7444, GNorm = 1.7140, lr_0 = 3.1788e-04
Loss = 2.9347e-01, PNorm = 66.7739, GNorm = 1.1903, lr_0 = 3.2163e-04
Loss = 2.8169e-01, PNorm = 66.8042, GNorm = 1.2077, lr_0 = 3.2538e-04
Loss = 2.8582e-01, PNorm = 66.8366, GNorm = 1.4670, lr_0 = 3.2912e-04
Loss = 3.1330e-01, PNorm = 66.8649, GNorm = 1.5442, lr_0 = 3.3288e-04
Loss = 2.9818e-01, PNorm = 66.8963, GNorm = 1.1408, lr_0 = 3.3663e-04
Loss = 2.7743e-01, PNorm = 66.9277, GNorm = 1.1685, lr_0 = 3.4038e-04
Loss = 2.7130e-01, PNorm = 66.9557, GNorm = 1.2591, lr_0 = 3.4413e-04
Loss = 2.6978e-01, PNorm = 66.9866, GNorm = 1.2415, lr_0 = 3.4787e-04
Loss = 2.9229e-01, PNorm = 67.0189, GNorm = 0.9685, lr_0 = 3.5162e-04
Loss = 3.0132e-01, PNorm = 67.0538, GNorm = 1.0396, lr_0 = 3.5538e-04
Loss = 2.8075e-01, PNorm = 67.0930, GNorm = 1.5450, lr_0 = 3.5913e-04
Loss = 2.5844e-01, PNorm = 67.1253, GNorm = 1.1419, lr_0 = 3.6288e-04
Loss = 2.5302e-01, PNorm = 67.1597, GNorm = 1.3475, lr_0 = 3.6662e-04
Loss = 2.6064e-01, PNorm = 67.1942, GNorm = 1.7187, lr_0 = 3.7037e-04
Loss = 2.5930e-01, PNorm = 67.2309, GNorm = 0.8894, lr_0 = 3.7413e-04
Loss = 3.0879e-01, PNorm = 67.2721, GNorm = 1.4722, lr_0 = 3.7788e-04
Loss = 2.5672e-01, PNorm = 67.3044, GNorm = 1.0175, lr_0 = 3.8163e-04
Loss = 2.3940e-01, PNorm = 67.3443, GNorm = 1.2355, lr_0 = 3.8537e-04
Loss = 3.0109e-01, PNorm = 67.3725, GNorm = 1.3751, lr_0 = 3.8912e-04
Loss = 2.5647e-01, PNorm = 67.4127, GNorm = 1.1400, lr_0 = 3.9287e-04
Loss = 2.5494e-01, PNorm = 67.4469, GNorm = 1.0278, lr_0 = 3.9663e-04
Loss = 2.7029e-01, PNorm = 67.4855, GNorm = 1.1734, lr_0 = 4.0038e-04
Loss = 2.8094e-01, PNorm = 67.5288, GNorm = 1.2260, lr_0 = 4.0413e-04
Loss = 2.9099e-01, PNorm = 67.5624, GNorm = 1.1843, lr_0 = 4.0787e-04
Loss = 2.5923e-01, PNorm = 67.6110, GNorm = 1.0846, lr_0 = 4.1162e-04
Loss = 2.6948e-01, PNorm = 67.6480, GNorm = 1.6469, lr_0 = 4.1537e-04
Loss = 2.6634e-01, PNorm = 67.6876, GNorm = 1.4988, lr_0 = 4.1913e-04
Loss = 2.9187e-01, PNorm = 67.7317, GNorm = 1.4215, lr_0 = 4.2288e-04
Loss = 2.6300e-01, PNorm = 67.7777, GNorm = 1.4264, lr_0 = 4.2662e-04
Loss = 3.0080e-01, PNorm = 67.8196, GNorm = 1.1683, lr_0 = 4.3037e-04
Loss = 2.6588e-01, PNorm = 67.8694, GNorm = 1.0412, lr_0 = 4.3412e-04
Loss = 2.5495e-01, PNorm = 67.9146, GNorm = 1.1204, lr_0 = 4.3788e-04
Loss = 3.0281e-01, PNorm = 67.9630, GNorm = 1.8247, lr_0 = 4.4163e-04
Loss = 2.2454e-01, PNorm = 68.0162, GNorm = 0.9850, lr_0 = 4.4538e-04
Loss = 2.5759e-01, PNorm = 68.0634, GNorm = 0.8403, lr_0 = 4.4912e-04
Loss = 2.5794e-01, PNorm = 68.1150, GNorm = 1.2145, lr_0 = 4.5287e-04
Loss = 2.7682e-01, PNorm = 68.1639, GNorm = 1.2662, lr_0 = 4.5662e-04
Loss = 2.3111e-01, PNorm = 68.2141, GNorm = 1.1293, lr_0 = 4.6038e-04
Loss = 2.4551e-01, PNorm = 68.2657, GNorm = 1.0210, lr_0 = 4.6413e-04
Loss = 2.6897e-01, PNorm = 68.3116, GNorm = 0.9036, lr_0 = 4.6787e-04
Loss = 2.1965e-01, PNorm = 68.3616, GNorm = 0.7919, lr_0 = 4.7162e-04
Loss = 2.6924e-01, PNorm = 68.4069, GNorm = 1.1382, lr_0 = 4.7537e-04
Loss = 2.5945e-01, PNorm = 68.4622, GNorm = 0.9912, lr_0 = 4.7913e-04
Loss = 2.8718e-01, PNorm = 68.5143, GNorm = 0.9458, lr_0 = 4.8288e-04
Loss = 2.2238e-01, PNorm = 68.5713, GNorm = 1.4655, lr_0 = 4.8663e-04
Loss = 2.2772e-01, PNorm = 68.6238, GNorm = 1.3167, lr_0 = 4.9038e-04
Loss = 2.5360e-01, PNorm = 68.6698, GNorm = 0.7592, lr_0 = 4.9412e-04
Loss = 2.6173e-01, PNorm = 68.7236, GNorm = 0.9571, lr_0 = 4.9788e-04
Loss = 2.5972e-01, PNorm = 68.7778, GNorm = 1.1152, lr_0 = 5.0163e-04
Loss = 2.6896e-01, PNorm = 68.8261, GNorm = 0.9856, lr_0 = 5.0538e-04
Loss = 2.6862e-01, PNorm = 68.8869, GNorm = 1.6509, lr_0 = 5.0913e-04
Loss = 2.4255e-01, PNorm = 68.9429, GNorm = 1.0539, lr_0 = 5.1287e-04
Loss = 2.6945e-01, PNorm = 69.0036, GNorm = 1.0277, lr_0 = 5.1663e-04
Loss = 2.3579e-01, PNorm = 69.0582, GNorm = 1.0867, lr_0 = 5.2038e-04
Loss = 2.7239e-01, PNorm = 69.1082, GNorm = 1.1659, lr_0 = 5.2413e-04
Loss = 2.7956e-01, PNorm = 69.1714, GNorm = 1.3251, lr_0 = 5.2788e-04
Loss = 2.5545e-01, PNorm = 69.2366, GNorm = 1.0697, lr_0 = 5.3162e-04
Loss = 2.2548e-01, PNorm = 69.2988, GNorm = 0.9031, lr_0 = 5.3538e-04
Loss = 2.3457e-01, PNorm = 69.3562, GNorm = 1.1057, lr_0 = 5.3912e-04
Loss = 2.4432e-01, PNorm = 69.4132, GNorm = 1.0289, lr_0 = 5.4288e-04
Loss = 2.7626e-01, PNorm = 69.4777, GNorm = 0.9245, lr_0 = 5.4663e-04
Loss = 2.2527e-01, PNorm = 69.5438, GNorm = 0.7365, lr_0 = 5.5038e-04
Validation mae = 0.320250
Epoch 1
Loss = 1.6252e-01, PNorm = 69.6003, GNorm = 0.7912, lr_0 = 5.5413e-04
Loss = 1.7289e-01, PNorm = 69.6563, GNorm = 0.9471, lr_0 = 5.5787e-04
Loss = 1.7336e-01, PNorm = 69.7078, GNorm = 0.9645, lr_0 = 5.6163e-04
Loss = 1.8144e-01, PNorm = 69.7645, GNorm = 0.8302, lr_0 = 5.6538e-04
Loss = 1.9790e-01, PNorm = 69.8258, GNorm = 0.9073, lr_0 = 5.6913e-04
Loss = 1.6551e-01, PNorm = 69.8900, GNorm = 0.7031, lr_0 = 5.7288e-04
Loss = 1.7319e-01, PNorm = 69.9475, GNorm = 1.3867, lr_0 = 5.7662e-04
Loss = 2.1190e-01, PNorm = 70.0155, GNorm = 1.3026, lr_0 = 5.8038e-04
Loss = 1.9107e-01, PNorm = 70.0683, GNorm = 1.0059, lr_0 = 5.8413e-04
Loss = 2.0385e-01, PNorm = 70.1443, GNorm = 1.0143, lr_0 = 5.8788e-04
Loss = 1.6140e-01, PNorm = 70.2095, GNorm = 0.8322, lr_0 = 5.9163e-04
Loss = 1.7010e-01, PNorm = 70.2722, GNorm = 0.7628, lr_0 = 5.9538e-04
Loss = 1.7674e-01, PNorm = 70.3337, GNorm = 1.3040, lr_0 = 5.9913e-04
Loss = 1.6306e-01, PNorm = 70.4059, GNorm = 0.8150, lr_0 = 6.0288e-04
Loss = 1.7963e-01, PNorm = 70.4791, GNorm = 0.8588, lr_0 = 6.0663e-04
Loss = 1.7493e-01, PNorm = 70.5521, GNorm = 0.8117, lr_0 = 6.1038e-04
Loss = 1.5330e-01, PNorm = 70.6246, GNorm = 0.7075, lr_0 = 6.1413e-04
Loss = 1.6020e-01, PNorm = 70.6869, GNorm = 1.0277, lr_0 = 6.1788e-04
Loss = 1.7198e-01, PNorm = 70.7667, GNorm = 0.8578, lr_0 = 6.2163e-04
Loss = 1.8015e-01, PNorm = 70.8459, GNorm = 0.9091, lr_0 = 6.2538e-04
Loss = 1.7076e-01, PNorm = 70.9289, GNorm = 0.8623, lr_0 = 6.2913e-04
Loss = 2.2167e-01, PNorm = 71.0207, GNorm = 1.2749, lr_0 = 6.3288e-04
Loss = 1.9183e-01, PNorm = 71.1109, GNorm = 0.8639, lr_0 = 6.3663e-04
Loss = 1.9322e-01, PNorm = 71.1979, GNorm = 1.5088, lr_0 = 6.4038e-04
Loss = 1.7543e-01, PNorm = 71.2864, GNorm = 0.9746, lr_0 = 6.4413e-04
Loss = 1.8180e-01, PNorm = 71.3686, GNorm = 0.8683, lr_0 = 6.4788e-04
Loss = 1.8766e-01, PNorm = 71.4600, GNorm = 0.8133, lr_0 = 6.5163e-04
Loss = 1.6916e-01, PNorm = 71.5437, GNorm = 0.7273, lr_0 = 6.5538e-04
Loss = 1.9924e-01, PNorm = 71.6324, GNorm = 0.7585, lr_0 = 6.5913e-04
Loss = 1.7478e-01, PNorm = 71.7193, GNorm = 0.7754, lr_0 = 6.6288e-04
Loss = 1.7814e-01, PNorm = 71.8059, GNorm = 0.9735, lr_0 = 6.6663e-04
Loss = 1.9101e-01, PNorm = 71.8922, GNorm = 0.6041, lr_0 = 6.7038e-04
Loss = 1.8695e-01, PNorm = 71.9771, GNorm = 0.7084, lr_0 = 6.7413e-04
Loss = 1.9865e-01, PNorm = 72.0867, GNorm = 0.9080, lr_0 = 6.7788e-04
Loss = 1.7998e-01, PNorm = 72.1835, GNorm = 0.7477, lr_0 = 6.8163e-04
Loss = 1.8164e-01, PNorm = 72.2744, GNorm = 0.8631, lr_0 = 6.8538e-04
Loss = 1.9203e-01, PNorm = 72.3613, GNorm = 0.9126, lr_0 = 6.8913e-04
Loss = 1.8182e-01, PNorm = 72.4554, GNorm = 0.7830, lr_0 = 6.9288e-04
Loss = 2.1146e-01, PNorm = 72.5506, GNorm = 0.7730, lr_0 = 6.9663e-04
Loss = 1.8382e-01, PNorm = 72.6465, GNorm = 0.9146, lr_0 = 7.0038e-04
Loss = 1.8725e-01, PNorm = 72.7484, GNorm = 0.6880, lr_0 = 7.0413e-04
Loss = 2.0929e-01, PNorm = 72.8504, GNorm = 1.0459, lr_0 = 7.0788e-04
Loss = 1.9344e-01, PNorm = 72.9718, GNorm = 0.8707, lr_0 = 7.1163e-04
Loss = 2.0372e-01, PNorm = 73.0705, GNorm = 1.1885, lr_0 = 7.1538e-04
Loss = 1.9815e-01, PNorm = 73.1937, GNorm = 0.6772, lr_0 = 7.1913e-04
Loss = 1.7811e-01, PNorm = 73.3101, GNorm = 0.9940, lr_0 = 7.2288e-04
Loss = 1.8126e-01, PNorm = 73.4165, GNorm = 0.9640, lr_0 = 7.2663e-04
Loss = 2.1117e-01, PNorm = 73.5388, GNorm = 1.2845, lr_0 = 7.3038e-04
Loss = 1.9688e-01, PNorm = 73.6562, GNorm = 1.1049, lr_0 = 7.3413e-04
Loss = 2.0151e-01, PNorm = 73.7886, GNorm = 0.8030, lr_0 = 7.3788e-04
Loss = 1.8353e-01, PNorm = 73.8996, GNorm = 0.8180, lr_0 = 7.4163e-04
Loss = 2.2031e-01, PNorm = 74.0273, GNorm = 1.1934, lr_0 = 7.4538e-04
Loss = 2.0646e-01, PNorm = 74.1443, GNorm = 0.9439, lr_0 = 7.4913e-04
Loss = 2.0635e-01, PNorm = 74.2747, GNorm = 1.1840, lr_0 = 7.5288e-04
Loss = 1.9371e-01, PNorm = 74.3930, GNorm = 1.0698, lr_0 = 7.5663e-04
Loss = 2.1022e-01, PNorm = 74.5186, GNorm = 0.8488, lr_0 = 7.6038e-04
Loss = 1.8375e-01, PNorm = 74.6276, GNorm = 1.0822, lr_0 = 7.6413e-04
Loss = 2.1985e-01, PNorm = 74.7337, GNorm = 0.7995, lr_0 = 7.6788e-04
Loss = 1.9111e-01, PNorm = 74.8544, GNorm = 0.7185, lr_0 = 7.7163e-04
Loss = 2.2457e-01, PNorm = 74.9715, GNorm = 0.9482, lr_0 = 7.7538e-04
Loss = 2.3974e-01, PNorm = 75.0989, GNorm = 0.8590, lr_0 = 7.7913e-04
Loss = 1.8663e-01, PNorm = 75.2337, GNorm = 0.8684, lr_0 = 7.8288e-04
Loss = 1.9018e-01, PNorm = 75.3482, GNorm = 1.4419, lr_0 = 7.8663e-04
Loss = 1.9371e-01, PNorm = 75.4680, GNorm = 0.6687, lr_0 = 7.9038e-04
Loss = 1.9949e-01, PNorm = 75.5875, GNorm = 1.1515, lr_0 = 7.9413e-04
Loss = 1.8709e-01, PNorm = 75.7058, GNorm = 0.7379, lr_0 = 7.9788e-04
Loss = 2.0349e-01, PNorm = 75.8257, GNorm = 1.0516, lr_0 = 8.0163e-04
Loss = 1.8564e-01, PNorm = 75.9364, GNorm = 0.7313, lr_0 = 8.0538e-04
Loss = 2.3934e-01, PNorm = 76.0504, GNorm = 0.9172, lr_0 = 8.0913e-04
Loss = 1.9528e-01, PNorm = 76.1825, GNorm = 0.8140, lr_0 = 8.1288e-04
Loss = 2.1776e-01, PNorm = 76.2934, GNorm = 0.6573, lr_0 = 8.1663e-04
Loss = 2.0988e-01, PNorm = 76.4323, GNorm = 0.7801, lr_0 = 8.2038e-04
Loss = 1.8156e-01, PNorm = 76.5510, GNorm = 0.9694, lr_0 = 8.2413e-04
Loss = 1.9802e-01, PNorm = 76.6712, GNorm = 0.9549, lr_0 = 8.2788e-04
Loss = 1.9669e-01, PNorm = 76.7994, GNorm = 0.8355, lr_0 = 8.3163e-04
Loss = 1.9563e-01, PNorm = 76.9440, GNorm = 0.7344, lr_0 = 8.3538e-04
Loss = 2.4230e-01, PNorm = 77.0655, GNorm = 0.9287, lr_0 = 8.3913e-04
Loss = 2.1536e-01, PNorm = 77.2189, GNorm = 1.0534, lr_0 = 8.4288e-04
Loss = 2.4613e-01, PNorm = 77.3654, GNorm = 0.7721, lr_0 = 8.4663e-04
Loss = 2.2201e-01, PNorm = 77.5282, GNorm = 1.2414, lr_0 = 8.5038e-04
Loss = 2.1339e-01, PNorm = 77.6676, GNorm = 0.7048, lr_0 = 8.5413e-04
Loss = 2.0924e-01, PNorm = 77.8132, GNorm = 0.5804, lr_0 = 8.5788e-04
Loss = 2.0170e-01, PNorm = 77.9311, GNorm = 0.9691, lr_0 = 8.6163e-04
Loss = 1.8059e-01, PNorm = 78.0642, GNorm = 1.0452, lr_0 = 8.6538e-04
Loss = 2.2007e-01, PNorm = 78.1793, GNorm = 0.9210, lr_0 = 8.6913e-04
Loss = 2.0002e-01, PNorm = 78.3123, GNorm = 0.6625, lr_0 = 8.7288e-04
Loss = 2.0888e-01, PNorm = 78.4428, GNorm = 0.7846, lr_0 = 8.7663e-04
Loss = 2.2127e-01, PNorm = 78.5839, GNorm = 1.5275, lr_0 = 8.8038e-04
Loss = 2.0687e-01, PNorm = 78.7168, GNorm = 1.1682, lr_0 = 8.8413e-04
Loss = 2.1678e-01, PNorm = 78.8480, GNorm = 0.8766, lr_0 = 8.8788e-04
Loss = 2.0553e-01, PNorm = 79.0020, GNorm = 0.6944, lr_0 = 8.9163e-04
Loss = 2.0131e-01, PNorm = 79.1173, GNorm = 0.7180, lr_0 = 8.9538e-04
Loss = 2.1507e-01, PNorm = 79.2555, GNorm = 0.8423, lr_0 = 8.9913e-04
Loss = 2.0917e-01, PNorm = 79.3768, GNorm = 0.5502, lr_0 = 9.0288e-04
Loss = 2.1201e-01, PNorm = 79.5172, GNorm = 0.8310, lr_0 = 9.0663e-04
Loss = 1.9008e-01, PNorm = 79.6463, GNorm = 0.7480, lr_0 = 9.1038e-04
Loss = 2.0724e-01, PNorm = 79.7690, GNorm = 0.9848, lr_0 = 9.1413e-04
Loss = 2.2808e-01, PNorm = 79.9004, GNorm = 1.9190, lr_0 = 9.1788e-04
Loss = 2.4128e-01, PNorm = 80.0390, GNorm = 0.7909, lr_0 = 9.2163e-04
Loss = 2.4806e-01, PNorm = 80.1852, GNorm = 1.1004, lr_0 = 9.2538e-04
Loss = 2.1643e-01, PNorm = 80.3261, GNorm = 0.7708, lr_0 = 9.2913e-04
Loss = 2.0424e-01, PNorm = 80.4627, GNorm = 1.0938, lr_0 = 9.3288e-04
Loss = 2.0301e-01, PNorm = 80.5935, GNorm = 0.6732, lr_0 = 9.3663e-04
Loss = 2.1211e-01, PNorm = 80.7359, GNorm = 0.6249, lr_0 = 9.4038e-04
Loss = 2.1871e-01, PNorm = 80.8706, GNorm = 0.7903, lr_0 = 9.4413e-04
Loss = 2.1398e-01, PNorm = 81.0169, GNorm = 0.8272, lr_0 = 9.4788e-04
Loss = 2.1455e-01, PNorm = 81.1570, GNorm = 1.1148, lr_0 = 9.5163e-04
Loss = 1.9903e-01, PNorm = 81.3092, GNorm = 0.8826, lr_0 = 9.5538e-04
Loss = 2.1794e-01, PNorm = 81.4656, GNorm = 0.6568, lr_0 = 9.5913e-04
Loss = 2.0005e-01, PNorm = 81.6329, GNorm = 0.8372, lr_0 = 9.6288e-04
Loss = 2.4023e-01, PNorm = 81.7736, GNorm = 1.2470, lr_0 = 9.6663e-04
Loss = 2.1049e-01, PNorm = 81.9265, GNorm = 0.6149, lr_0 = 9.7038e-04
Loss = 1.9583e-01, PNorm = 82.0612, GNorm = 0.7420, lr_0 = 9.7413e-04
Loss = 2.3142e-01, PNorm = 82.2104, GNorm = 1.0909, lr_0 = 9.7788e-04
Loss = 2.3847e-01, PNorm = 82.3703, GNorm = 1.1853, lr_0 = 9.8163e-04
Loss = 2.0742e-01, PNorm = 82.5221, GNorm = 0.7833, lr_0 = 9.8537e-04
Loss = 2.1873e-01, PNorm = 82.6645, GNorm = 0.7428, lr_0 = 9.8912e-04
Loss = 2.0874e-01, PNorm = 82.8156, GNorm = 0.7279, lr_0 = 9.9288e-04
Loss = 2.2926e-01, PNorm = 82.9578, GNorm = 0.9554, lr_0 = 9.9663e-04
Loss = 2.0461e-01, PNorm = 83.1120, GNorm = 0.7247, lr_0 = 9.9993e-04
Validation mae = 0.309422
Epoch 2
Loss = 1.2969e-01, PNorm = 83.2516, GNorm = 0.4573, lr_0 = 9.9925e-04
Loss = 1.4252e-01, PNorm = 83.3819, GNorm = 0.8431, lr_0 = 9.9856e-04
Loss = 1.4652e-01, PNorm = 83.4934, GNorm = 0.6452, lr_0 = 9.9788e-04
Loss = 1.4537e-01, PNorm = 83.6198, GNorm = 0.5253, lr_0 = 9.9719e-04
Loss = 1.2307e-01, PNorm = 83.7364, GNorm = 0.5487, lr_0 = 9.9651e-04
Loss = 1.2430e-01, PNorm = 83.8513, GNorm = 0.5262, lr_0 = 9.9583e-04
Loss = 1.2629e-01, PNorm = 83.9604, GNorm = 0.6570, lr_0 = 9.9515e-04
Loss = 1.2442e-01, PNorm = 84.0865, GNorm = 0.8671, lr_0 = 9.9446e-04
Loss = 1.2687e-01, PNorm = 84.2043, GNorm = 0.7331, lr_0 = 9.9378e-04
Loss = 1.2472e-01, PNorm = 84.3303, GNorm = 0.7979, lr_0 = 9.9310e-04
Loss = 1.0852e-01, PNorm = 84.4617, GNorm = 0.5659, lr_0 = 9.9242e-04
Loss = 1.2672e-01, PNorm = 84.5778, GNorm = 0.4749, lr_0 = 9.9174e-04
Loss = 1.4285e-01, PNorm = 84.7042, GNorm = 0.9118, lr_0 = 9.9106e-04
Loss = 1.2016e-01, PNorm = 84.8283, GNorm = 0.7086, lr_0 = 9.9038e-04
Loss = 1.3635e-01, PNorm = 84.9560, GNorm = 1.1078, lr_0 = 9.8971e-04
Loss = 1.0821e-01, PNorm = 85.0802, GNorm = 0.6262, lr_0 = 9.8903e-04
Loss = 1.3397e-01, PNorm = 85.1879, GNorm = 0.8909, lr_0 = 9.8835e-04
Loss = 1.5103e-01, PNorm = 85.3036, GNorm = 0.8683, lr_0 = 9.8767e-04
Loss = 1.1104e-01, PNorm = 85.4302, GNorm = 0.5694, lr_0 = 9.8700e-04
Loss = 1.1976e-01, PNorm = 85.5494, GNorm = 0.5718, lr_0 = 9.8632e-04
Loss = 1.2230e-01, PNorm = 85.6634, GNorm = 0.6905, lr_0 = 9.8564e-04
Loss = 1.3054e-01, PNorm = 85.7900, GNorm = 0.7974, lr_0 = 9.8497e-04
Loss = 1.4310e-01, PNorm = 85.9232, GNorm = 0.7509, lr_0 = 9.8429e-04
Loss = 1.1341e-01, PNorm = 86.0704, GNorm = 0.6398, lr_0 = 9.8362e-04
Loss = 1.3307e-01, PNorm = 86.1979, GNorm = 0.6332, lr_0 = 9.8295e-04
Loss = 1.4119e-01, PNorm = 86.3418, GNorm = 0.7454, lr_0 = 9.8227e-04
Loss = 1.3617e-01, PNorm = 86.4714, GNorm = 0.6149, lr_0 = 9.8160e-04
Loss = 1.4410e-01, PNorm = 86.6063, GNorm = 1.0036, lr_0 = 9.8093e-04
Loss = 1.3949e-01, PNorm = 86.7397, GNorm = 0.6921, lr_0 = 9.8026e-04
Loss = 1.1205e-01, PNorm = 86.8718, GNorm = 0.5668, lr_0 = 9.7958e-04
Loss = 1.3125e-01, PNorm = 86.9850, GNorm = 0.6817, lr_0 = 9.7891e-04
Loss = 1.3218e-01, PNorm = 87.1157, GNorm = 0.5557, lr_0 = 9.7824e-04
Loss = 1.3739e-01, PNorm = 87.2506, GNorm = 1.1856, lr_0 = 9.7757e-04
Loss = 1.3845e-01, PNorm = 87.3691, GNorm = 0.9098, lr_0 = 9.7690e-04
Loss = 1.4219e-01, PNorm = 87.5058, GNorm = 1.0924, lr_0 = 9.7623e-04
Loss = 1.3762e-01, PNorm = 87.6215, GNorm = 0.7193, lr_0 = 9.7556e-04
Loss = 1.2370e-01, PNorm = 87.7598, GNorm = 0.6835, lr_0 = 9.7490e-04
Loss = 1.3028e-01, PNorm = 87.8664, GNorm = 1.0489, lr_0 = 9.7423e-04
Loss = 1.3288e-01, PNorm = 87.9802, GNorm = 0.8008, lr_0 = 9.7356e-04
Loss = 1.2933e-01, PNorm = 88.0982, GNorm = 0.4769, lr_0 = 9.7289e-04
Loss = 1.2906e-01, PNorm = 88.2136, GNorm = 0.5289, lr_0 = 9.7223e-04
Loss = 1.2829e-01, PNorm = 88.3414, GNorm = 0.4890, lr_0 = 9.7156e-04
Loss = 1.2707e-01, PNorm = 88.4599, GNorm = 0.9065, lr_0 = 9.7090e-04
Loss = 1.3128e-01, PNorm = 88.5854, GNorm = 0.5755, lr_0 = 9.7023e-04
Loss = 1.4462e-01, PNorm = 88.7022, GNorm = 0.6655, lr_0 = 9.6957e-04
Loss = 1.3501e-01, PNorm = 88.8322, GNorm = 0.6305, lr_0 = 9.6890e-04
Loss = 1.2814e-01, PNorm = 88.9662, GNorm = 0.5893, lr_0 = 9.6824e-04
Loss = 1.3553e-01, PNorm = 89.0885, GNorm = 0.6769, lr_0 = 9.6757e-04
Loss = 1.2298e-01, PNorm = 89.2031, GNorm = 0.6598, lr_0 = 9.6691e-04
Loss = 1.3860e-01, PNorm = 89.3244, GNorm = 0.5773, lr_0 = 9.6625e-04
Loss = 1.4922e-01, PNorm = 89.4494, GNorm = 0.6747, lr_0 = 9.6559e-04
Loss = 1.5034e-01, PNorm = 89.5830, GNorm = 0.8075, lr_0 = 9.6493e-04
Loss = 1.3750e-01, PNorm = 89.7139, GNorm = 0.6788, lr_0 = 9.6427e-04
Loss = 1.2986e-01, PNorm = 89.8399, GNorm = 0.7926, lr_0 = 9.6360e-04
Loss = 1.2880e-01, PNorm = 89.9601, GNorm = 0.6963, lr_0 = 9.6294e-04
Loss = 1.3098e-01, PNorm = 90.0799, GNorm = 0.4314, lr_0 = 9.6228e-04
Loss = 1.3515e-01, PNorm = 90.2116, GNorm = 0.9894, lr_0 = 9.6163e-04
Loss = 1.4726e-01, PNorm = 90.3337, GNorm = 0.8484, lr_0 = 9.6097e-04
Loss = 1.3881e-01, PNorm = 90.4575, GNorm = 0.8000, lr_0 = 9.6031e-04
Loss = 1.2012e-01, PNorm = 90.5821, GNorm = 0.5603, lr_0 = 9.5965e-04
Loss = 1.2721e-01, PNorm = 90.6910, GNorm = 0.4168, lr_0 = 9.5899e-04
Loss = 1.2648e-01, PNorm = 90.8019, GNorm = 0.7441, lr_0 = 9.5834e-04
Loss = 1.5126e-01, PNorm = 90.9071, GNorm = 0.7168, lr_0 = 9.5768e-04
Loss = 1.2681e-01, PNorm = 91.0317, GNorm = 0.6835, lr_0 = 9.5702e-04
Loss = 1.3881e-01, PNorm = 91.1459, GNorm = 0.6702, lr_0 = 9.5637e-04
Loss = 1.5332e-01, PNorm = 91.2991, GNorm = 0.7362, lr_0 = 9.5571e-04
Loss = 1.5962e-01, PNorm = 91.4304, GNorm = 0.6409, lr_0 = 9.5506e-04
Loss = 1.3426e-01, PNorm = 91.5879, GNorm = 0.6977, lr_0 = 9.5440e-04
Loss = 1.3672e-01, PNorm = 91.7256, GNorm = 0.6847, lr_0 = 9.5375e-04
Loss = 1.3311e-01, PNorm = 91.8528, GNorm = 0.4986, lr_0 = 9.5310e-04
Loss = 1.2492e-01, PNorm = 91.9748, GNorm = 0.8336, lr_0 = 9.5244e-04
Loss = 1.3135e-01, PNorm = 92.1068, GNorm = 0.7458, lr_0 = 9.5179e-04
Loss = 1.3018e-01, PNorm = 92.2298, GNorm = 0.5136, lr_0 = 9.5114e-04
Loss = 1.2797e-01, PNorm = 92.3528, GNorm = 0.5178, lr_0 = 9.5049e-04
Loss = 1.4384e-01, PNorm = 92.4756, GNorm = 0.5729, lr_0 = 9.4984e-04
Loss = 1.3172e-01, PNorm = 92.6065, GNorm = 1.2460, lr_0 = 9.4919e-04
Loss = 1.3394e-01, PNorm = 92.7217, GNorm = 0.5979, lr_0 = 9.4854e-04
Loss = 1.4501e-01, PNorm = 92.8478, GNorm = 1.4390, lr_0 = 9.4789e-04
Loss = 1.4457e-01, PNorm = 92.9792, GNorm = 0.5849, lr_0 = 9.4724e-04
Loss = 1.2863e-01, PNorm = 93.1159, GNorm = 0.5020, lr_0 = 9.4659e-04
Loss = 1.7117e-01, PNorm = 93.2476, GNorm = 0.6501, lr_0 = 9.4594e-04
Loss = 1.3449e-01, PNorm = 93.3751, GNorm = 0.5004, lr_0 = 9.4529e-04
Loss = 1.4633e-01, PNorm = 93.5006, GNorm = 0.5992, lr_0 = 9.4464e-04
Loss = 1.6428e-01, PNorm = 93.6329, GNorm = 1.2222, lr_0 = 9.4400e-04
Loss = 1.4497e-01, PNorm = 93.7665, GNorm = 1.0958, lr_0 = 9.4335e-04
Loss = 1.3749e-01, PNorm = 93.8940, GNorm = 0.9937, lr_0 = 9.4270e-04
Loss = 1.3902e-01, PNorm = 94.0276, GNorm = 0.5742, lr_0 = 9.4206e-04
Loss = 1.5614e-01, PNorm = 94.1556, GNorm = 0.9593, lr_0 = 9.4141e-04
Loss = 1.4126e-01, PNorm = 94.2833, GNorm = 0.6187, lr_0 = 9.4077e-04
Loss = 1.5846e-01, PNorm = 94.4290, GNorm = 0.6881, lr_0 = 9.4012e-04
Loss = 1.4542e-01, PNorm = 94.5595, GNorm = 0.6686, lr_0 = 9.3948e-04
Loss = 1.5876e-01, PNorm = 94.7113, GNorm = 0.6905, lr_0 = 9.3884e-04
Loss = 1.4548e-01, PNorm = 94.8384, GNorm = 0.4600, lr_0 = 9.3819e-04
Loss = 1.3558e-01, PNorm = 94.9791, GNorm = 0.5648, lr_0 = 9.3755e-04
Loss = 1.4291e-01, PNorm = 95.1085, GNorm = 0.6615, lr_0 = 9.3691e-04
Loss = 1.3759e-01, PNorm = 95.2409, GNorm = 0.6925, lr_0 = 9.3627e-04
Loss = 1.3877e-01, PNorm = 95.3654, GNorm = 0.9771, lr_0 = 9.3562e-04
Loss = 1.3429e-01, PNorm = 95.4919, GNorm = 0.5269, lr_0 = 9.3498e-04
Loss = 1.4393e-01, PNorm = 95.6127, GNorm = 0.8756, lr_0 = 9.3434e-04
Loss = 1.2700e-01, PNorm = 95.7389, GNorm = 0.4363, lr_0 = 9.3370e-04
Loss = 1.4312e-01, PNorm = 95.8558, GNorm = 0.8088, lr_0 = 9.3306e-04
Loss = 1.4523e-01, PNorm = 95.9701, GNorm = 1.1635, lr_0 = 9.3242e-04
Loss = 1.1943e-01, PNorm = 96.0883, GNorm = 0.6615, lr_0 = 9.3178e-04
Loss = 1.4666e-01, PNorm = 96.2011, GNorm = 0.5528, lr_0 = 9.3115e-04
Loss = 1.3659e-01, PNorm = 96.3182, GNorm = 0.7053, lr_0 = 9.3051e-04
Loss = 1.4100e-01, PNorm = 96.4282, GNorm = 0.6909, lr_0 = 9.2987e-04
Loss = 1.3961e-01, PNorm = 96.5565, GNorm = 0.8590, lr_0 = 9.2923e-04
Loss = 1.6155e-01, PNorm = 96.6813, GNorm = 1.2285, lr_0 = 9.2860e-04
Loss = 1.4752e-01, PNorm = 96.8104, GNorm = 0.7737, lr_0 = 9.2796e-04
Loss = 1.5440e-01, PNorm = 96.9271, GNorm = 0.5150, lr_0 = 9.2733e-04
Loss = 1.4017e-01, PNorm = 97.0473, GNorm = 0.4847, lr_0 = 9.2669e-04
Loss = 1.6040e-01, PNorm = 97.1738, GNorm = 1.4258, lr_0 = 9.2606e-04
Loss = 1.4708e-01, PNorm = 97.3125, GNorm = 1.0834, lr_0 = 9.2542e-04
Loss = 1.6413e-01, PNorm = 97.4387, GNorm = 0.8392, lr_0 = 9.2479e-04
Loss = 1.4614e-01, PNorm = 97.5842, GNorm = 0.5105, lr_0 = 9.2415e-04
Loss = 1.6793e-01, PNorm = 97.7359, GNorm = 0.6200, lr_0 = 9.2352e-04
Loss = 1.5000e-01, PNorm = 97.8784, GNorm = 0.6082, lr_0 = 9.2289e-04
Loss = 1.4196e-01, PNorm = 98.0094, GNorm = 0.6775, lr_0 = 9.2226e-04
Loss = 1.4369e-01, PNorm = 98.1425, GNorm = 0.6381, lr_0 = 9.2162e-04
Loss = 1.5109e-01, PNorm = 98.2723, GNorm = 1.1874, lr_0 = 9.2099e-04
Validation mae = 0.298207
Epoch 3
Loss = 8.7175e-02, PNorm = 98.3943, GNorm = 0.5414, lr_0 = 9.2036e-04
Loss = 7.4348e-02, PNorm = 98.4827, GNorm = 0.5810, lr_0 = 9.1973e-04
Loss = 8.2962e-02, PNorm = 98.5571, GNorm = 0.7259, lr_0 = 9.1910e-04
Loss = 7.8448e-02, PNorm = 98.6313, GNorm = 0.7546, lr_0 = 9.1847e-04
Loss = 7.7820e-02, PNorm = 98.7035, GNorm = 0.5892, lr_0 = 9.1784e-04
Loss = 7.5530e-02, PNorm = 98.7808, GNorm = 0.4304, lr_0 = 9.1721e-04
Loss = 8.4492e-02, PNorm = 98.8546, GNorm = 0.6614, lr_0 = 9.1658e-04
Loss = 7.5595e-02, PNorm = 98.9361, GNorm = 0.3679, lr_0 = 9.1596e-04
Loss = 8.0515e-02, PNorm = 99.0125, GNorm = 0.4670, lr_0 = 9.1533e-04
Loss = 7.3429e-02, PNorm = 99.0831, GNorm = 0.3698, lr_0 = 9.1470e-04
Loss = 8.2656e-02, PNorm = 99.1576, GNorm = 0.2745, lr_0 = 9.1408e-04
Loss = 7.4998e-02, PNorm = 99.2372, GNorm = 0.5887, lr_0 = 9.1345e-04
Loss = 7.6331e-02, PNorm = 99.3065, GNorm = 0.5134, lr_0 = 9.1282e-04
Loss = 8.1355e-02, PNorm = 99.3818, GNorm = 0.7034, lr_0 = 9.1220e-04
Loss = 7.2166e-02, PNorm = 99.4572, GNorm = 0.4711, lr_0 = 9.1157e-04
Loss = 8.5757e-02, PNorm = 99.5467, GNorm = 0.5785, lr_0 = 9.1095e-04
Loss = 7.6007e-02, PNorm = 99.6353, GNorm = 0.7177, lr_0 = 9.1032e-04
Loss = 6.9457e-02, PNorm = 99.7121, GNorm = 0.4943, lr_0 = 9.0970e-04
Loss = 6.9058e-02, PNorm = 99.7902, GNorm = 0.4555, lr_0 = 9.0908e-04
Loss = 7.5077e-02, PNorm = 99.8609, GNorm = 0.5044, lr_0 = 9.0846e-04
Loss = 8.3076e-02, PNorm = 99.9399, GNorm = 0.5865, lr_0 = 9.0783e-04
Loss = 7.7547e-02, PNorm = 100.0263, GNorm = 0.5816, lr_0 = 9.0721e-04
Loss = 7.9656e-02, PNorm = 100.1129, GNorm = 0.3682, lr_0 = 9.0659e-04
Loss = 8.1737e-02, PNorm = 100.2012, GNorm = 0.7717, lr_0 = 9.0597e-04
Loss = 7.0719e-02, PNorm = 100.2829, GNorm = 0.5569, lr_0 = 9.0535e-04
Loss = 8.2822e-02, PNorm = 100.3584, GNorm = 0.5502, lr_0 = 9.0473e-04
Loss = 7.5661e-02, PNorm = 100.4457, GNorm = 0.6624, lr_0 = 9.0411e-04
Loss = 7.6501e-02, PNorm = 100.5237, GNorm = 0.7940, lr_0 = 9.0349e-04
Loss = 8.0952e-02, PNorm = 100.6040, GNorm = 0.6952, lr_0 = 9.0287e-04
Loss = 9.3892e-02, PNorm = 100.6742, GNorm = 0.6729, lr_0 = 9.0225e-04
Loss = 9.7726e-02, PNorm = 100.7741, GNorm = 0.4229, lr_0 = 9.0163e-04
Loss = 8.1908e-02, PNorm = 100.8696, GNorm = 0.6202, lr_0 = 9.0102e-04
Loss = 8.1597e-02, PNorm = 100.9646, GNorm = 0.5116, lr_0 = 9.0040e-04
Loss = 7.8207e-02, PNorm = 101.0455, GNorm = 0.8591, lr_0 = 8.9978e-04
Loss = 8.0754e-02, PNorm = 101.1309, GNorm = 0.4163, lr_0 = 8.9916e-04
Loss = 7.7635e-02, PNorm = 101.2106, GNorm = 0.4683, lr_0 = 8.9855e-04
Loss = 8.3287e-02, PNorm = 101.2884, GNorm = 1.3858, lr_0 = 8.9793e-04
Loss = 8.4837e-02, PNorm = 101.3611, GNorm = 0.7740, lr_0 = 8.9732e-04
Loss = 7.4227e-02, PNorm = 101.4518, GNorm = 0.5530, lr_0 = 8.9670e-04
Loss = 8.1126e-02, PNorm = 101.5310, GNorm = 0.4397, lr_0 = 8.9609e-04
Loss = 7.4196e-02, PNorm = 101.6155, GNorm = 1.1398, lr_0 = 8.9548e-04
Loss = 8.3349e-02, PNorm = 101.6928, GNorm = 0.4101, lr_0 = 8.9486e-04
Loss = 9.0345e-02, PNorm = 101.7830, GNorm = 0.4279, lr_0 = 8.9425e-04
Loss = 7.2826e-02, PNorm = 101.8716, GNorm = 0.7484, lr_0 = 8.9364e-04
Loss = 9.3526e-02, PNorm = 101.9622, GNorm = 0.4409, lr_0 = 8.9302e-04
Loss = 7.8760e-02, PNorm = 102.0426, GNorm = 0.6196, lr_0 = 8.9241e-04
Loss = 8.4004e-02, PNorm = 102.1366, GNorm = 0.6820, lr_0 = 8.9180e-04
Loss = 8.1772e-02, PNorm = 102.2226, GNorm = 0.6585, lr_0 = 8.9119e-04
Loss = 8.6781e-02, PNorm = 102.3211, GNorm = 0.6370, lr_0 = 8.9058e-04
Loss = 8.7832e-02, PNorm = 102.4118, GNorm = 0.3564, lr_0 = 8.8997e-04
Loss = 7.8677e-02, PNorm = 102.5051, GNorm = 0.8669, lr_0 = 8.8936e-04
Loss = 7.6609e-02, PNorm = 102.5834, GNorm = 0.4490, lr_0 = 8.8875e-04
Loss = 7.7075e-02, PNorm = 102.6708, GNorm = 0.5661, lr_0 = 8.8814e-04
Loss = 8.1094e-02, PNorm = 102.7545, GNorm = 0.4241, lr_0 = 8.8753e-04
Loss = 8.0734e-02, PNorm = 102.8540, GNorm = 0.3602, lr_0 = 8.8693e-04
Loss = 8.3519e-02, PNorm = 102.9519, GNorm = 0.9095, lr_0 = 8.8632e-04
Loss = 8.1352e-02, PNorm = 103.0462, GNorm = 0.4568, lr_0 = 8.8571e-04
Loss = 8.3429e-02, PNorm = 103.1438, GNorm = 0.6071, lr_0 = 8.8510e-04
Loss = 8.3653e-02, PNorm = 103.2271, GNorm = 0.5456, lr_0 = 8.8450e-04
Loss = 9.3828e-02, PNorm = 103.3316, GNorm = 1.0642, lr_0 = 8.8389e-04
Loss = 8.8056e-02, PNorm = 103.4315, GNorm = 0.7172, lr_0 = 8.8329e-04
Loss = 8.2859e-02, PNorm = 103.5357, GNorm = 0.9186, lr_0 = 8.8268e-04
Loss = 8.1687e-02, PNorm = 103.6293, GNorm = 0.8784, lr_0 = 8.8208e-04
Loss = 8.2556e-02, PNorm = 103.7309, GNorm = 0.7622, lr_0 = 8.8147e-04
Loss = 9.1353e-02, PNorm = 103.8282, GNorm = 0.4150, lr_0 = 8.8087e-04
Loss = 1.0182e-01, PNorm = 103.9297, GNorm = 1.2205, lr_0 = 8.8026e-04
Loss = 8.6647e-02, PNorm = 104.0491, GNorm = 0.4346, lr_0 = 8.7966e-04
Loss = 9.1083e-02, PNorm = 104.1419, GNorm = 0.7868, lr_0 = 8.7906e-04
Loss = 1.0491e-01, PNorm = 104.2409, GNorm = 0.4111, lr_0 = 8.7846e-04
Loss = 9.4601e-02, PNorm = 104.3478, GNorm = 0.5877, lr_0 = 8.7785e-04
Loss = 1.0530e-01, PNorm = 104.4618, GNorm = 0.9200, lr_0 = 8.7725e-04
Loss = 8.7708e-02, PNorm = 104.5771, GNorm = 0.7921, lr_0 = 8.7665e-04
Loss = 8.7427e-02, PNorm = 104.6803, GNorm = 0.5447, lr_0 = 8.7605e-04
Loss = 8.9539e-02, PNorm = 104.7906, GNorm = 0.8748, lr_0 = 8.7545e-04
Loss = 9.6175e-02, PNorm = 104.8899, GNorm = 0.6659, lr_0 = 8.7485e-04
Loss = 8.8164e-02, PNorm = 104.9983, GNorm = 0.4522, lr_0 = 8.7425e-04
Loss = 8.4311e-02, PNorm = 105.1041, GNorm = 0.5749, lr_0 = 8.7365e-04
Loss = 8.2559e-02, PNorm = 105.1951, GNorm = 0.6855, lr_0 = 8.7306e-04
Loss = 7.5473e-02, PNorm = 105.2914, GNorm = 0.6922, lr_0 = 8.7246e-04
Loss = 8.7081e-02, PNorm = 105.3784, GNorm = 0.6973, lr_0 = 8.7186e-04
Loss = 1.0054e-01, PNorm = 105.4774, GNorm = 0.5024, lr_0 = 8.7126e-04
Loss = 1.0748e-01, PNorm = 105.5698, GNorm = 0.8793, lr_0 = 8.7067e-04
Loss = 1.0261e-01, PNorm = 105.6865, GNorm = 0.8479, lr_0 = 8.7007e-04
Loss = 8.1116e-02, PNorm = 105.7995, GNorm = 0.4813, lr_0 = 8.6947e-04
Loss = 9.7639e-02, PNorm = 105.8976, GNorm = 0.8738, lr_0 = 8.6888e-04
Loss = 8.1870e-02, PNorm = 106.0021, GNorm = 0.9910, lr_0 = 8.6828e-04
Loss = 9.9432e-02, PNorm = 106.1048, GNorm = 1.3344, lr_0 = 8.6769e-04
Loss = 9.4270e-02, PNorm = 106.2267, GNorm = 0.8511, lr_0 = 8.6709e-04
Loss = 1.0054e-01, PNorm = 106.3430, GNorm = 0.5651, lr_0 = 8.6650e-04
Loss = 9.8423e-02, PNorm = 106.4546, GNorm = 0.9601, lr_0 = 8.6590e-04
Loss = 8.4882e-02, PNorm = 106.5575, GNorm = 0.4091, lr_0 = 8.6531e-04
Loss = 1.0100e-01, PNorm = 106.6622, GNorm = 0.7940, lr_0 = 8.6472e-04
Loss = 8.6019e-02, PNorm = 106.7713, GNorm = 0.7476, lr_0 = 8.6413e-04
Loss = 9.6623e-02, PNorm = 106.8768, GNorm = 0.3751, lr_0 = 8.6353e-04
Loss = 9.3643e-02, PNorm = 106.9848, GNorm = 0.5515, lr_0 = 8.6294e-04
Loss = 8.7347e-02, PNorm = 107.0902, GNorm = 0.4680, lr_0 = 8.6235e-04
Loss = 8.6780e-02, PNorm = 107.2036, GNorm = 0.7959, lr_0 = 8.6176e-04
Loss = 9.0142e-02, PNorm = 107.3061, GNorm = 0.5377, lr_0 = 8.6117e-04
Loss = 9.0566e-02, PNorm = 107.3990, GNorm = 0.4071, lr_0 = 8.6058e-04
Loss = 9.2846e-02, PNorm = 107.4974, GNorm = 0.8058, lr_0 = 8.5999e-04
Loss = 8.8599e-02, PNorm = 107.6061, GNorm = 0.7819, lr_0 = 8.5940e-04
Loss = 9.5863e-02, PNorm = 107.7178, GNorm = 0.8375, lr_0 = 8.5881e-04
Loss = 9.8437e-02, PNorm = 107.8395, GNorm = 0.6637, lr_0 = 8.5823e-04
Loss = 9.6114e-02, PNorm = 107.9504, GNorm = 0.6192, lr_0 = 8.5764e-04
Loss = 8.8591e-02, PNorm = 108.0682, GNorm = 0.4242, lr_0 = 8.5705e-04
Loss = 1.0079e-01, PNorm = 108.1804, GNorm = 0.5067, lr_0 = 8.5646e-04
Loss = 9.9758e-02, PNorm = 108.2925, GNorm = 1.0526, lr_0 = 8.5588e-04
Loss = 9.8030e-02, PNorm = 108.4119, GNorm = 0.5249, lr_0 = 8.5529e-04
Loss = 1.0769e-01, PNorm = 108.5255, GNorm = 0.6485, lr_0 = 8.5470e-04
Loss = 8.7303e-02, PNorm = 108.6434, GNorm = 0.6391, lr_0 = 8.5412e-04
Loss = 9.1409e-02, PNorm = 108.7621, GNorm = 0.7000, lr_0 = 8.5353e-04
Loss = 9.8824e-02, PNorm = 108.8712, GNorm = 1.1020, lr_0 = 8.5295e-04
Loss = 1.0376e-01, PNorm = 108.9943, GNorm = 1.0756, lr_0 = 8.5236e-04
Loss = 1.1128e-01, PNorm = 109.1126, GNorm = 1.1017, lr_0 = 8.5178e-04
Loss = 9.8077e-02, PNorm = 109.2178, GNorm = 0.5053, lr_0 = 8.5120e-04
Loss = 1.1099e-01, PNorm = 109.3395, GNorm = 1.5496, lr_0 = 8.5061e-04
Loss = 9.2328e-02, PNorm = 109.4384, GNorm = 1.2802, lr_0 = 8.5003e-04
Loss = 9.9663e-02, PNorm = 109.5604, GNorm = 0.6258, lr_0 = 8.4945e-04
Loss = 8.7948e-02, PNorm = 109.6671, GNorm = 0.4591, lr_0 = 8.4887e-04
Loss = 1.0566e-01, PNorm = 109.7688, GNorm = 0.7136, lr_0 = 8.4828e-04
Validation mae = 0.297482
Epoch 4
Loss = 6.2503e-02, PNorm = 109.8640, GNorm = 0.5816, lr_0 = 8.4770e-04
Loss = 5.3833e-02, PNorm = 109.9496, GNorm = 0.8250, lr_0 = 8.4712e-04
Loss = 5.8341e-02, PNorm = 110.0262, GNorm = 0.5619, lr_0 = 8.4654e-04
Loss = 6.6432e-02, PNorm = 110.0885, GNorm = 0.4878, lr_0 = 8.4596e-04
Loss = 5.7823e-02, PNorm = 110.1438, GNorm = 0.4757, lr_0 = 8.4538e-04
Loss = 5.8861e-02, PNorm = 110.2043, GNorm = 0.3924, lr_0 = 8.4480e-04
Loss = 5.5433e-02, PNorm = 110.2667, GNorm = 0.6023, lr_0 = 8.4423e-04
Loss = 5.6959e-02, PNorm = 110.3284, GNorm = 0.4382, lr_0 = 8.4365e-04
Loss = 5.3485e-02, PNorm = 110.3857, GNorm = 0.8569, lr_0 = 8.4307e-04
Loss = 5.7835e-02, PNorm = 110.4537, GNorm = 0.3956, lr_0 = 8.4249e-04
Loss = 5.2222e-02, PNorm = 110.5342, GNorm = 0.2962, lr_0 = 8.4191e-04
Loss = 6.3054e-02, PNorm = 110.6006, GNorm = 0.2939, lr_0 = 8.4134e-04
Loss = 5.1257e-02, PNorm = 110.6685, GNorm = 0.4898, lr_0 = 8.4076e-04
Loss = 5.7712e-02, PNorm = 110.7234, GNorm = 0.4439, lr_0 = 8.4019e-04
Loss = 6.0574e-02, PNorm = 110.7836, GNorm = 1.0119, lr_0 = 8.3961e-04
Loss = 4.8084e-02, PNorm = 110.8422, GNorm = 0.3835, lr_0 = 8.3903e-04
Loss = 5.2960e-02, PNorm = 110.9064, GNorm = 0.9884, lr_0 = 8.3846e-04
Loss = 5.4816e-02, PNorm = 110.9772, GNorm = 0.3409, lr_0 = 8.3789e-04
Loss = 5.2264e-02, PNorm = 111.0474, GNorm = 0.2728, lr_0 = 8.3731e-04
Loss = 5.8712e-02, PNorm = 111.1073, GNorm = 0.9896, lr_0 = 8.3674e-04
Loss = 5.7516e-02, PNorm = 111.1771, GNorm = 1.1897, lr_0 = 8.3616e-04
Loss = 5.0467e-02, PNorm = 111.2391, GNorm = 0.5134, lr_0 = 8.3559e-04
Loss = 6.1579e-02, PNorm = 111.3102, GNorm = 0.3802, lr_0 = 8.3502e-04
Loss = 6.0661e-02, PNorm = 111.3876, GNorm = 0.2967, lr_0 = 8.3445e-04
Loss = 4.6744e-02, PNorm = 111.4489, GNorm = 0.2568, lr_0 = 8.3388e-04
Loss = 6.0340e-02, PNorm = 111.5094, GNorm = 0.6936, lr_0 = 8.3330e-04
Loss = 6.5976e-02, PNorm = 111.5682, GNorm = 0.6761, lr_0 = 8.3273e-04
Loss = 5.7862e-02, PNorm = 111.6436, GNorm = 0.4230, lr_0 = 8.3216e-04
Loss = 5.1019e-02, PNorm = 111.7060, GNorm = 0.5161, lr_0 = 8.3159e-04
Loss = 5.9377e-02, PNorm = 111.7842, GNorm = 0.3930, lr_0 = 8.3102e-04
Loss = 5.6545e-02, PNorm = 111.8473, GNorm = 0.5734, lr_0 = 8.3045e-04
Loss = 6.1444e-02, PNorm = 111.9170, GNorm = 0.5923, lr_0 = 8.2988e-04
Loss = 5.4725e-02, PNorm = 111.9954, GNorm = 0.3251, lr_0 = 8.2932e-04
Loss = 6.6451e-02, PNorm = 112.0797, GNorm = 0.4283, lr_0 = 8.2875e-04
Loss = 6.6889e-02, PNorm = 112.1664, GNorm = 0.3317, lr_0 = 8.2818e-04
Loss = 5.8995e-02, PNorm = 112.2555, GNorm = 0.2921, lr_0 = 8.2761e-04
Loss = 5.3240e-02, PNorm = 112.3348, GNorm = 0.6406, lr_0 = 8.2705e-04
Loss = 5.6149e-02, PNorm = 112.4111, GNorm = 0.5063, lr_0 = 8.2648e-04
Loss = 5.4261e-02, PNorm = 112.4779, GNorm = 1.1128, lr_0 = 8.2591e-04
Loss = 4.9029e-02, PNorm = 112.5473, GNorm = 0.4055, lr_0 = 8.2535e-04
Loss = 6.2118e-02, PNorm = 112.6254, GNorm = 0.3461, lr_0 = 8.2478e-04
Loss = 5.5870e-02, PNorm = 112.7031, GNorm = 0.3780, lr_0 = 8.2422e-04
Loss = 5.2359e-02, PNorm = 112.7833, GNorm = 0.3148, lr_0 = 8.2365e-04
Loss = 5.0140e-02, PNorm = 112.8631, GNorm = 0.4998, lr_0 = 8.2309e-04
Loss = 6.0138e-02, PNorm = 112.9322, GNorm = 0.2947, lr_0 = 8.2252e-04
Loss = 5.2062e-02, PNorm = 113.0016, GNorm = 0.4209, lr_0 = 8.2196e-04
Loss = 5.3805e-02, PNorm = 113.0755, GNorm = 0.4742, lr_0 = 8.2140e-04
Loss = 5.6867e-02, PNorm = 113.1545, GNorm = 0.4057, lr_0 = 8.2084e-04
Loss = 5.8771e-02, PNorm = 113.2452, GNorm = 0.9928, lr_0 = 8.2027e-04
Loss = 5.6325e-02, PNorm = 113.3266, GNorm = 0.2844, lr_0 = 8.1971e-04
Loss = 5.5073e-02, PNorm = 113.3960, GNorm = 0.4792, lr_0 = 8.1915e-04
Loss = 5.7535e-02, PNorm = 113.4681, GNorm = 0.3619, lr_0 = 8.1859e-04
Loss = 6.2279e-02, PNorm = 113.5482, GNorm = 0.3729, lr_0 = 8.1803e-04
Loss = 5.3975e-02, PNorm = 113.6205, GNorm = 0.3909, lr_0 = 8.1747e-04
Loss = 6.9254e-02, PNorm = 113.6951, GNorm = 0.8291, lr_0 = 8.1691e-04
Loss = 6.2559e-02, PNorm = 113.7792, GNorm = 0.4665, lr_0 = 8.1635e-04
Loss = 5.5660e-02, PNorm = 113.8642, GNorm = 0.7959, lr_0 = 8.1579e-04
Loss = 5.4806e-02, PNorm = 113.9427, GNorm = 0.3333, lr_0 = 8.1523e-04
Loss = 5.9721e-02, PNorm = 114.0173, GNorm = 0.3353, lr_0 = 8.1467e-04
Loss = 6.6579e-02, PNorm = 114.1036, GNorm = 0.4889, lr_0 = 8.1411e-04
Loss = 5.4533e-02, PNorm = 114.1891, GNorm = 0.8651, lr_0 = 8.1355e-04
Loss = 6.5972e-02, PNorm = 114.2679, GNorm = 1.7243, lr_0 = 8.1300e-04
Loss = 6.1765e-02, PNorm = 114.3658, GNorm = 0.9177, lr_0 = 8.1244e-04
Loss = 5.3037e-02, PNorm = 114.4413, GNorm = 0.5822, lr_0 = 8.1188e-04
Loss = 6.4617e-02, PNorm = 114.5216, GNorm = 0.5180, lr_0 = 8.1133e-04
Loss = 5.4665e-02, PNorm = 114.6085, GNorm = 0.6768, lr_0 = 8.1077e-04
Loss = 6.1396e-02, PNorm = 114.6867, GNorm = 0.4004, lr_0 = 8.1022e-04
Loss = 5.7957e-02, PNorm = 114.7736, GNorm = 0.9105, lr_0 = 8.0966e-04
Loss = 6.4277e-02, PNorm = 114.8468, GNorm = 0.5400, lr_0 = 8.0911e-04
Loss = 6.4626e-02, PNorm = 114.9346, GNorm = 0.4262, lr_0 = 8.0855e-04
Loss = 5.7797e-02, PNorm = 115.0128, GNorm = 0.2910, lr_0 = 8.0800e-04
Loss = 5.0867e-02, PNorm = 115.1050, GNorm = 0.5736, lr_0 = 8.0745e-04
Loss = 6.3949e-02, PNorm = 115.1801, GNorm = 0.4111, lr_0 = 8.0689e-04
Loss = 6.0383e-02, PNorm = 115.2666, GNorm = 0.7402, lr_0 = 8.0634e-04
Loss = 6.7021e-02, PNorm = 115.3447, GNorm = 0.3382, lr_0 = 8.0579e-04
Loss = 6.2352e-02, PNorm = 115.4333, GNorm = 0.3479, lr_0 = 8.0523e-04
Loss = 6.0900e-02, PNorm = 115.5316, GNorm = 0.3986, lr_0 = 8.0468e-04
Loss = 6.1252e-02, PNorm = 115.6035, GNorm = 0.4229, lr_0 = 8.0413e-04
Loss = 6.2339e-02, PNorm = 115.6943, GNorm = 0.7346, lr_0 = 8.0358e-04
Loss = 6.4186e-02, PNorm = 115.7751, GNorm = 0.2740, lr_0 = 8.0303e-04
Loss = 6.7315e-02, PNorm = 115.8608, GNorm = 0.5583, lr_0 = 8.0248e-04
Loss = 6.2218e-02, PNorm = 115.9426, GNorm = 0.3564, lr_0 = 8.0193e-04
Loss = 5.8655e-02, PNorm = 116.0326, GNorm = 0.5013, lr_0 = 8.0138e-04
Loss = 6.1131e-02, PNorm = 116.1324, GNorm = 0.5059, lr_0 = 8.0083e-04
Loss = 6.9964e-02, PNorm = 116.2212, GNorm = 1.0089, lr_0 = 8.0028e-04
Loss = 6.9103e-02, PNorm = 116.3191, GNorm = 1.0361, lr_0 = 7.9974e-04
Loss = 5.8246e-02, PNorm = 116.4073, GNorm = 0.5388, lr_0 = 7.9919e-04
Loss = 6.4740e-02, PNorm = 116.4842, GNorm = 0.5368, lr_0 = 7.9864e-04
Loss = 7.3140e-02, PNorm = 116.5711, GNorm = 0.4592, lr_0 = 7.9809e-04
Loss = 7.1636e-02, PNorm = 116.6715, GNorm = 0.6240, lr_0 = 7.9755e-04
Loss = 6.9093e-02, PNorm = 116.7591, GNorm = 0.5708, lr_0 = 7.9700e-04
Loss = 5.7929e-02, PNorm = 116.8492, GNorm = 0.5158, lr_0 = 7.9645e-04
Loss = 5.1547e-02, PNorm = 116.9439, GNorm = 0.4230, lr_0 = 7.9591e-04
Loss = 6.7344e-02, PNorm = 117.0283, GNorm = 0.7296, lr_0 = 7.9536e-04
Loss = 5.9353e-02, PNorm = 117.1208, GNorm = 0.5309, lr_0 = 7.9482e-04
Loss = 7.3755e-02, PNorm = 117.2071, GNorm = 0.4769, lr_0 = 7.9427e-04
Loss = 6.4080e-02, PNorm = 117.3097, GNorm = 0.5276, lr_0 = 7.9373e-04
Loss = 6.8426e-02, PNorm = 117.3997, GNorm = 0.3319, lr_0 = 7.9319e-04
Loss = 6.0189e-02, PNorm = 117.5024, GNorm = 0.6561, lr_0 = 7.9264e-04
Loss = 6.8100e-02, PNorm = 117.5933, GNorm = 0.4633, lr_0 = 7.9210e-04
Loss = 6.3348e-02, PNorm = 117.6965, GNorm = 0.3190, lr_0 = 7.9156e-04
Loss = 6.9601e-02, PNorm = 117.7927, GNorm = 0.4935, lr_0 = 7.9101e-04
Loss = 6.6828e-02, PNorm = 117.8915, GNorm = 0.4742, lr_0 = 7.9047e-04
Loss = 6.5184e-02, PNorm = 117.9899, GNorm = 0.6527, lr_0 = 7.8993e-04
Loss = 7.2009e-02, PNorm = 118.0726, GNorm = 0.3720, lr_0 = 7.8939e-04
Loss = 6.3390e-02, PNorm = 118.1678, GNorm = 0.8642, lr_0 = 7.8885e-04
Loss = 6.6036e-02, PNorm = 118.2632, GNorm = 0.5358, lr_0 = 7.8831e-04
Loss = 7.5488e-02, PNorm = 118.3631, GNorm = 0.6597, lr_0 = 7.8777e-04
Loss = 6.9553e-02, PNorm = 118.4588, GNorm = 0.3460, lr_0 = 7.8723e-04
Loss = 6.0295e-02, PNorm = 118.5487, GNorm = 0.4707, lr_0 = 7.8669e-04
Loss = 5.5517e-02, PNorm = 118.6374, GNorm = 0.3287, lr_0 = 7.8615e-04
Loss = 6.6980e-02, PNorm = 118.7360, GNorm = 0.4749, lr_0 = 7.8561e-04
Loss = 7.4785e-02, PNorm = 118.8284, GNorm = 0.3813, lr_0 = 7.8507e-04
Loss = 6.2296e-02, PNorm = 118.9260, GNorm = 0.7662, lr_0 = 7.8454e-04
Loss = 6.5827e-02, PNorm = 119.0290, GNorm = 0.3864, lr_0 = 7.8400e-04
Loss = 6.7599e-02, PNorm = 119.1128, GNorm = 1.1124, lr_0 = 7.8346e-04
Loss = 6.5968e-02, PNorm = 119.2108, GNorm = 0.7797, lr_0 = 7.8293e-04
Loss = 7.0001e-02, PNorm = 119.2987, GNorm = 0.7083, lr_0 = 7.8239e-04
Loss = 6.3262e-02, PNorm = 119.3955, GNorm = 0.4186, lr_0 = 7.8185e-04
Loss = 6.1026e-02, PNorm = 119.4943, GNorm = 0.3421, lr_0 = 7.8132e-04
Validation mae = 0.289758
Epoch 5
Loss = 4.2661e-02, PNorm = 119.5753, GNorm = 0.4321, lr_0 = 7.8078e-04
Loss = 5.1630e-02, PNorm = 119.6458, GNorm = 0.5734, lr_0 = 7.8025e-04
Loss = 4.1546e-02, PNorm = 119.7039, GNorm = 0.5821, lr_0 = 7.7971e-04
Loss = 4.8157e-02, PNorm = 119.7660, GNorm = 0.3181, lr_0 = 7.7918e-04
Loss = 4.8306e-02, PNorm = 119.8267, GNorm = 0.2613, lr_0 = 7.7864e-04
Loss = 4.1840e-02, PNorm = 119.8883, GNorm = 0.6154, lr_0 = 7.7811e-04
Loss = 4.0297e-02, PNorm = 119.9386, GNorm = 0.4193, lr_0 = 7.7758e-04
Loss = 4.1572e-02, PNorm = 119.9895, GNorm = 0.4516, lr_0 = 7.7705e-04
Loss = 4.2637e-02, PNorm = 120.0379, GNorm = 0.4689, lr_0 = 7.7651e-04
Loss = 3.7215e-02, PNorm = 120.1020, GNorm = 0.3463, lr_0 = 7.7598e-04
Loss = 4.6320e-02, PNorm = 120.1502, GNorm = 0.3293, lr_0 = 7.7545e-04
Loss = 4.8071e-02, PNorm = 120.2129, GNorm = 0.6999, lr_0 = 7.7492e-04
Loss = 4.2253e-02, PNorm = 120.2710, GNorm = 0.5138, lr_0 = 7.7439e-04
Loss = 3.9674e-02, PNorm = 120.3345, GNorm = 1.0169, lr_0 = 7.7386e-04
Loss = 3.6855e-02, PNorm = 120.3832, GNorm = 0.2432, lr_0 = 7.7333e-04
Loss = 4.8275e-02, PNorm = 120.4348, GNorm = 0.3785, lr_0 = 7.7280e-04
Loss = 3.9866e-02, PNorm = 120.4890, GNorm = 0.6041, lr_0 = 7.7227e-04
Loss = 4.1877e-02, PNorm = 120.5502, GNorm = 1.0477, lr_0 = 7.7174e-04
Loss = 4.4794e-02, PNorm = 120.6063, GNorm = 0.3425, lr_0 = 7.7121e-04
Loss = 3.9361e-02, PNorm = 120.6718, GNorm = 0.5836, lr_0 = 7.7068e-04
Loss = 3.3223e-02, PNorm = 120.7243, GNorm = 0.7135, lr_0 = 7.7015e-04
Loss = 3.8740e-02, PNorm = 120.7820, GNorm = 0.4011, lr_0 = 7.6963e-04
Loss = 4.1964e-02, PNorm = 120.8329, GNorm = 0.4867, lr_0 = 7.6910e-04
Loss = 4.1220e-02, PNorm = 120.8932, GNorm = 0.5251, lr_0 = 7.6857e-04
Loss = 3.6766e-02, PNorm = 120.9530, GNorm = 0.2753, lr_0 = 7.6805e-04
Loss = 4.2706e-02, PNorm = 121.0119, GNorm = 0.8498, lr_0 = 7.6752e-04
Loss = 3.7966e-02, PNorm = 121.0748, GNorm = 0.3482, lr_0 = 7.6699e-04
Loss = 4.2855e-02, PNorm = 121.1298, GNorm = 0.5803, lr_0 = 7.6647e-04
Loss = 4.7645e-02, PNorm = 121.1877, GNorm = 0.6210, lr_0 = 7.6594e-04
Loss = 4.0869e-02, PNorm = 121.2499, GNorm = 0.2114, lr_0 = 7.6542e-04
Loss = 3.9745e-02, PNorm = 121.3064, GNorm = 0.3296, lr_0 = 7.6489e-04
Loss = 4.2393e-02, PNorm = 121.3623, GNorm = 0.5716, lr_0 = 7.6437e-04
Loss = 3.6118e-02, PNorm = 121.4250, GNorm = 0.6918, lr_0 = 7.6385e-04
Loss = 4.6015e-02, PNorm = 121.4906, GNorm = 0.3553, lr_0 = 7.6332e-04
Loss = 4.2328e-02, PNorm = 121.5552, GNorm = 0.2282, lr_0 = 7.6280e-04
Loss = 4.0078e-02, PNorm = 121.6151, GNorm = 0.3073, lr_0 = 7.6228e-04
Loss = 4.2121e-02, PNorm = 121.6799, GNorm = 0.2529, lr_0 = 7.6176e-04
Loss = 3.6151e-02, PNorm = 121.7463, GNorm = 0.3922, lr_0 = 7.6123e-04
Loss = 4.3583e-02, PNorm = 121.8074, GNorm = 0.4396, lr_0 = 7.6071e-04
Loss = 4.2500e-02, PNorm = 121.8669, GNorm = 0.2583, lr_0 = 7.6019e-04
Loss = 5.2227e-02, PNorm = 121.9330, GNorm = 0.8170, lr_0 = 7.5967e-04
Loss = 4.7524e-02, PNorm = 122.0083, GNorm = 0.4232, lr_0 = 7.5915e-04
Loss = 3.9596e-02, PNorm = 122.0796, GNorm = 0.4048, lr_0 = 7.5863e-04
Loss = 3.9131e-02, PNorm = 122.1424, GNorm = 0.2784, lr_0 = 7.5811e-04
Loss = 4.3662e-02, PNorm = 122.1956, GNorm = 0.5879, lr_0 = 7.5759e-04
Loss = 4.7498e-02, PNorm = 122.2691, GNorm = 0.7560, lr_0 = 7.5707e-04
Loss = 3.9192e-02, PNorm = 122.3382, GNorm = 0.5249, lr_0 = 7.5655e-04
Loss = 3.5663e-02, PNorm = 122.4065, GNorm = 0.3327, lr_0 = 7.5603e-04
Loss = 3.9189e-02, PNorm = 122.4646, GNorm = 0.4126, lr_0 = 7.5552e-04
Loss = 4.4687e-02, PNorm = 122.5219, GNorm = 0.3144, lr_0 = 7.5500e-04
Loss = 3.9747e-02, PNorm = 122.5802, GNorm = 0.6354, lr_0 = 7.5448e-04
Loss = 4.3244e-02, PNorm = 122.6453, GNorm = 1.2313, lr_0 = 7.5397e-04
Loss = 3.8561e-02, PNorm = 122.7138, GNorm = 0.3173, lr_0 = 7.5345e-04
Loss = 4.0145e-02, PNorm = 122.7824, GNorm = 1.0342, lr_0 = 7.5293e-04
Loss = 4.7412e-02, PNorm = 122.8531, GNorm = 0.2794, lr_0 = 7.5242e-04
Loss = 3.9779e-02, PNorm = 122.9199, GNorm = 0.6039, lr_0 = 7.5190e-04
Loss = 4.1689e-02, PNorm = 122.9868, GNorm = 0.5215, lr_0 = 7.5139e-04
Loss = 4.4124e-02, PNorm = 123.0579, GNorm = 0.6169, lr_0 = 7.5087e-04
Loss = 4.7474e-02, PNorm = 123.1256, GNorm = 0.7725, lr_0 = 7.5036e-04
Loss = 3.9302e-02, PNorm = 123.1956, GNorm = 0.2879, lr_0 = 7.4984e-04
Loss = 4.2906e-02, PNorm = 123.2669, GNorm = 0.3635, lr_0 = 7.4933e-04
Loss = 4.0536e-02, PNorm = 123.3358, GNorm = 0.3249, lr_0 = 7.4882e-04
Loss = 4.4716e-02, PNorm = 123.4043, GNorm = 0.4388, lr_0 = 7.4830e-04
Loss = 4.3022e-02, PNorm = 123.4826, GNorm = 0.9852, lr_0 = 7.4779e-04
Loss = 3.7603e-02, PNorm = 123.5570, GNorm = 0.5700, lr_0 = 7.4728e-04
Loss = 4.9623e-02, PNorm = 123.6426, GNorm = 0.9989, lr_0 = 7.4677e-04
Loss = 4.1213e-02, PNorm = 123.7265, GNorm = 0.6682, lr_0 = 7.4625e-04
Loss = 4.1193e-02, PNorm = 123.8053, GNorm = 0.3554, lr_0 = 7.4574e-04
Loss = 4.1927e-02, PNorm = 123.8787, GNorm = 0.7833, lr_0 = 7.4523e-04
Loss = 4.4667e-02, PNorm = 123.9448, GNorm = 0.8593, lr_0 = 7.4472e-04
Loss = 4.3503e-02, PNorm = 124.0227, GNorm = 0.6078, lr_0 = 7.4421e-04
Loss = 4.4927e-02, PNorm = 124.0974, GNorm = 0.4424, lr_0 = 7.4370e-04
Loss = 4.0168e-02, PNorm = 124.1781, GNorm = 0.4255, lr_0 = 7.4319e-04
Loss = 3.7712e-02, PNorm = 124.2512, GNorm = 0.2202, lr_0 = 7.4268e-04
Loss = 4.3844e-02, PNorm = 124.3188, GNorm = 0.6953, lr_0 = 7.4217e-04
Loss = 4.1371e-02, PNorm = 124.3844, GNorm = 0.2731, lr_0 = 7.4167e-04
Loss = 4.0676e-02, PNorm = 124.4564, GNorm = 0.2403, lr_0 = 7.4116e-04
Loss = 4.1117e-02, PNorm = 124.5308, GNorm = 0.4602, lr_0 = 7.4065e-04
Loss = 4.2149e-02, PNorm = 124.6078, GNorm = 0.4651, lr_0 = 7.4014e-04
Loss = 4.2802e-02, PNorm = 124.6769, GNorm = 0.4344, lr_0 = 7.3964e-04
Loss = 4.8864e-02, PNorm = 124.7664, GNorm = 0.6155, lr_0 = 7.3913e-04
Loss = 3.8403e-02, PNorm = 124.8466, GNorm = 0.2968, lr_0 = 7.3862e-04
Loss = 5.1233e-02, PNorm = 124.9139, GNorm = 0.3567, lr_0 = 7.3812e-04
Loss = 4.6583e-02, PNorm = 124.9918, GNorm = 0.5983, lr_0 = 7.3761e-04
Loss = 4.5481e-02, PNorm = 125.0669, GNorm = 0.8226, lr_0 = 7.3711e-04
Loss = 4.4097e-02, PNorm = 125.1465, GNorm = 1.2754, lr_0 = 7.3660e-04
Loss = 4.7197e-02, PNorm = 125.2187, GNorm = 0.3679, lr_0 = 7.3610e-04
Loss = 4.7237e-02, PNorm = 125.2880, GNorm = 0.4202, lr_0 = 7.3559e-04
Loss = 4.3356e-02, PNorm = 125.3665, GNorm = 0.8057, lr_0 = 7.3509e-04
Loss = 4.3768e-02, PNorm = 125.4405, GNorm = 0.6347, lr_0 = 7.3458e-04
Loss = 5.2389e-02, PNorm = 125.5129, GNorm = 0.3655, lr_0 = 7.3408e-04
Loss = 4.0369e-02, PNorm = 125.5816, GNorm = 0.4987, lr_0 = 7.3358e-04
Loss = 4.2241e-02, PNorm = 125.6588, GNorm = 0.4487, lr_0 = 7.3308e-04
Loss = 4.7934e-02, PNorm = 125.7292, GNorm = 0.5799, lr_0 = 7.3257e-04
Loss = 5.0863e-02, PNorm = 125.8058, GNorm = 0.4063, lr_0 = 7.3207e-04
Loss = 4.4424e-02, PNorm = 125.8785, GNorm = 0.3961, lr_0 = 7.3157e-04
Loss = 4.2111e-02, PNorm = 125.9548, GNorm = 0.4473, lr_0 = 7.3107e-04
Loss = 4.8512e-02, PNorm = 126.0253, GNorm = 0.5523, lr_0 = 7.3057e-04
Loss = 4.6637e-02, PNorm = 126.1075, GNorm = 0.9534, lr_0 = 7.3007e-04
Loss = 4.3318e-02, PNorm = 126.1902, GNorm = 0.9850, lr_0 = 7.2957e-04
Loss = 4.6974e-02, PNorm = 126.2727, GNorm = 0.4406, lr_0 = 7.2907e-04
Loss = 4.5622e-02, PNorm = 126.3527, GNorm = 0.2730, lr_0 = 7.2857e-04
Loss = 4.7987e-02, PNorm = 126.4329, GNorm = 0.7873, lr_0 = 7.2807e-04
Loss = 4.8086e-02, PNorm = 126.5087, GNorm = 0.4187, lr_0 = 7.2757e-04
Loss = 4.9498e-02, PNorm = 126.5876, GNorm = 0.3329, lr_0 = 7.2707e-04
Loss = 4.9183e-02, PNorm = 126.6700, GNorm = 0.6238, lr_0 = 7.2657e-04
Loss = 5.2595e-02, PNorm = 126.7568, GNorm = 0.4210, lr_0 = 7.2608e-04
Loss = 4.7915e-02, PNorm = 126.8441, GNorm = 0.5016, lr_0 = 7.2558e-04
Loss = 5.0740e-02, PNorm = 126.9297, GNorm = 0.4810, lr_0 = 7.2508e-04
Loss = 5.2527e-02, PNorm = 127.0169, GNorm = 0.7128, lr_0 = 7.2458e-04
Loss = 5.1312e-02, PNorm = 127.1040, GNorm = 0.4837, lr_0 = 7.2409e-04
Loss = 4.2085e-02, PNorm = 127.1882, GNorm = 0.4501, lr_0 = 7.2359e-04
Loss = 4.5495e-02, PNorm = 127.2695, GNorm = 0.6636, lr_0 = 7.2310e-04
Loss = 4.4589e-02, PNorm = 127.3513, GNorm = 0.7018, lr_0 = 7.2260e-04
Loss = 5.5952e-02, PNorm = 127.4350, GNorm = 0.3862, lr_0 = 7.2211e-04
Loss = 4.7732e-02, PNorm = 127.5256, GNorm = 0.7628, lr_0 = 7.2161e-04
Loss = 5.0234e-02, PNorm = 127.6239, GNorm = 0.2512, lr_0 = 7.2112e-04
Loss = 5.2947e-02, PNorm = 127.7121, GNorm = 0.6920, lr_0 = 7.2062e-04
Loss = 4.6938e-02, PNorm = 127.8028, GNorm = 0.3098, lr_0 = 7.2013e-04
Loss = 4.2930e-02, PNorm = 127.8826, GNorm = 0.6829, lr_0 = 7.1964e-04
Validation mae = 0.289385
Epoch 6
Loss = 3.5535e-02, PNorm = 127.9501, GNorm = 0.2238, lr_0 = 7.1914e-04
Loss = 3.5003e-02, PNorm = 128.0101, GNorm = 0.3080, lr_0 = 7.1865e-04
Loss = 3.1971e-02, PNorm = 128.0622, GNorm = 0.2849, lr_0 = 7.1816e-04
Loss = 3.8057e-02, PNorm = 128.1141, GNorm = 0.8715, lr_0 = 7.1767e-04
Loss = 3.4057e-02, PNorm = 128.1624, GNorm = 0.4130, lr_0 = 7.1717e-04
Loss = 3.1282e-02, PNorm = 128.2102, GNorm = 0.4910, lr_0 = 7.1668e-04
Loss = 3.2994e-02, PNorm = 128.2592, GNorm = 0.5657, lr_0 = 7.1619e-04
Loss = 3.1251e-02, PNorm = 128.3087, GNorm = 0.2462, lr_0 = 7.1570e-04
Loss = 3.2200e-02, PNorm = 128.3518, GNorm = 0.4423, lr_0 = 7.1521e-04
Loss = 3.4583e-02, PNorm = 128.3995, GNorm = 0.6618, lr_0 = 7.1472e-04
Loss = 2.9515e-02, PNorm = 128.4466, GNorm = 0.3472, lr_0 = 7.1423e-04
Loss = 3.2707e-02, PNorm = 128.4967, GNorm = 0.4631, lr_0 = 7.1374e-04
Loss = 3.3972e-02, PNorm = 128.5416, GNorm = 0.2516, lr_0 = 7.1325e-04
Loss = 3.5644e-02, PNorm = 128.5947, GNorm = 0.2851, lr_0 = 7.1277e-04
Loss = 3.1541e-02, PNorm = 128.6463, GNorm = 0.2146, lr_0 = 7.1228e-04
Loss = 3.3191e-02, PNorm = 128.7038, GNorm = 0.6266, lr_0 = 7.1179e-04
Loss = 3.8214e-02, PNorm = 128.7512, GNorm = 0.7506, lr_0 = 7.1130e-04
Loss = 3.2120e-02, PNorm = 128.8079, GNorm = 0.4063, lr_0 = 7.1081e-04
Loss = 4.0311e-02, PNorm = 128.8602, GNorm = 0.3699, lr_0 = 7.1033e-04
Loss = 3.7665e-02, PNorm = 128.9209, GNorm = 0.4719, lr_0 = 7.0984e-04
Loss = 3.4262e-02, PNorm = 128.9755, GNorm = 0.2346, lr_0 = 7.0935e-04
Loss = 3.2586e-02, PNorm = 129.0283, GNorm = 0.9312, lr_0 = 7.0887e-04
Loss = 3.8767e-02, PNorm = 129.0856, GNorm = 0.8152, lr_0 = 7.0838e-04
Loss = 2.9875e-02, PNorm = 129.1374, GNorm = 0.3025, lr_0 = 7.0790e-04
Loss = 3.2646e-02, PNorm = 129.1902, GNorm = 0.5254, lr_0 = 7.0741e-04
Loss = 3.2044e-02, PNorm = 129.2358, GNorm = 0.2558, lr_0 = 7.0693e-04
Loss = 3.2299e-02, PNorm = 129.2935, GNorm = 0.1716, lr_0 = 7.0644e-04
Loss = 3.3499e-02, PNorm = 129.3473, GNorm = 0.2839, lr_0 = 7.0596e-04
Loss = 3.4658e-02, PNorm = 129.4042, GNorm = 0.4881, lr_0 = 7.0548e-04
Loss = 3.3454e-02, PNorm = 129.4547, GNorm = 0.4649, lr_0 = 7.0499e-04
Loss = 3.4484e-02, PNorm = 129.5082, GNorm = 0.5332, lr_0 = 7.0451e-04
Loss = 2.9485e-02, PNorm = 129.5593, GNorm = 0.2960, lr_0 = 7.0403e-04
Loss = 2.8839e-02, PNorm = 129.6145, GNorm = 0.2129, lr_0 = 7.0354e-04
Loss = 2.9783e-02, PNorm = 129.6625, GNorm = 0.4336, lr_0 = 7.0306e-04
Loss = 2.9824e-02, PNorm = 129.7201, GNorm = 0.5980, lr_0 = 7.0258e-04
Loss = 3.1651e-02, PNorm = 129.7713, GNorm = 0.7456, lr_0 = 7.0210e-04
Loss = 3.2936e-02, PNorm = 129.8259, GNorm = 0.4355, lr_0 = 7.0162e-04
Loss = 3.1295e-02, PNorm = 129.8769, GNorm = 0.4266, lr_0 = 7.0114e-04
Loss = 3.5502e-02, PNorm = 129.9237, GNorm = 0.3503, lr_0 = 7.0066e-04
Loss = 3.6537e-02, PNorm = 129.9793, GNorm = 0.3318, lr_0 = 7.0018e-04
Loss = 3.6368e-02, PNorm = 130.0401, GNorm = 0.4830, lr_0 = 6.9970e-04
Loss = 2.6440e-02, PNorm = 130.1045, GNorm = 0.4724, lr_0 = 6.9922e-04
Loss = 3.5713e-02, PNorm = 130.1607, GNorm = 0.5101, lr_0 = 6.9874e-04
Loss = 3.6714e-02, PNorm = 130.2212, GNorm = 0.8819, lr_0 = 6.9826e-04
Loss = 2.8570e-02, PNorm = 130.2784, GNorm = 0.1964, lr_0 = 6.9778e-04
Loss = 3.1041e-02, PNorm = 130.3368, GNorm = 0.2325, lr_0 = 6.9730e-04
Loss = 3.1641e-02, PNorm = 130.3890, GNorm = 0.4163, lr_0 = 6.9683e-04
Loss = 3.3975e-02, PNorm = 130.4520, GNorm = 0.6573, lr_0 = 6.9635e-04
Loss = 3.6784e-02, PNorm = 130.5090, GNorm = 0.5425, lr_0 = 6.9587e-04
Loss = 3.0202e-02, PNorm = 130.5718, GNorm = 0.4701, lr_0 = 6.9540e-04
Loss = 3.8956e-02, PNorm = 130.6307, GNorm = 1.0433, lr_0 = 6.9492e-04
Loss = 3.9010e-02, PNorm = 130.7013, GNorm = 0.8019, lr_0 = 6.9444e-04
Loss = 3.7196e-02, PNorm = 130.7727, GNorm = 0.4941, lr_0 = 6.9397e-04
Loss = 3.1004e-02, PNorm = 130.8356, GNorm = 0.5893, lr_0 = 6.9349e-04
Loss = 3.3718e-02, PNorm = 130.8918, GNorm = 0.2795, lr_0 = 6.9302e-04
Loss = 3.7172e-02, PNorm = 130.9562, GNorm = 0.3744, lr_0 = 6.9254e-04
Loss = 3.0065e-02, PNorm = 131.0217, GNorm = 0.2599, lr_0 = 6.9207e-04
Loss = 3.1278e-02, PNorm = 131.0705, GNorm = 0.2758, lr_0 = 6.9159e-04
Loss = 4.0192e-02, PNorm = 131.1304, GNorm = 0.6475, lr_0 = 6.9112e-04
Loss = 2.9694e-02, PNorm = 131.1937, GNorm = 0.5160, lr_0 = 6.9065e-04
Loss = 4.0836e-02, PNorm = 131.2507, GNorm = 0.4412, lr_0 = 6.9017e-04
Loss = 3.0845e-02, PNorm = 131.3085, GNorm = 0.2268, lr_0 = 6.8970e-04
Loss = 3.3380e-02, PNorm = 131.3730, GNorm = 0.3196, lr_0 = 6.8923e-04
Loss = 3.1791e-02, PNorm = 131.4345, GNorm = 0.4852, lr_0 = 6.8876e-04
Loss = 3.1631e-02, PNorm = 131.4995, GNorm = 0.4179, lr_0 = 6.8828e-04
Loss = 3.4793e-02, PNorm = 131.5611, GNorm = 0.2636, lr_0 = 6.8781e-04
Loss = 3.7965e-02, PNorm = 131.6265, GNorm = 0.5707, lr_0 = 6.8734e-04
Loss = 3.6758e-02, PNorm = 131.6880, GNorm = 0.7097, lr_0 = 6.8687e-04
Loss = 3.5989e-02, PNorm = 131.7525, GNorm = 0.2092, lr_0 = 6.8640e-04
Loss = 3.0151e-02, PNorm = 131.8099, GNorm = 0.5532, lr_0 = 6.8593e-04
Loss = 3.7889e-02, PNorm = 131.8731, GNorm = 0.2159, lr_0 = 6.8546e-04
Loss = 3.6174e-02, PNorm = 131.9396, GNorm = 0.3892, lr_0 = 6.8499e-04
Loss = 3.7479e-02, PNorm = 132.0054, GNorm = 0.5111, lr_0 = 6.8452e-04
Loss = 3.6753e-02, PNorm = 132.0752, GNorm = 0.6069, lr_0 = 6.8405e-04
Loss = 3.7334e-02, PNorm = 132.1450, GNorm = 0.3682, lr_0 = 6.8358e-04
Loss = 3.5747e-02, PNorm = 132.2191, GNorm = 0.4240, lr_0 = 6.8312e-04
Loss = 3.4297e-02, PNorm = 132.2908, GNorm = 0.3084, lr_0 = 6.8265e-04
Loss = 3.1733e-02, PNorm = 132.3574, GNorm = 0.4640, lr_0 = 6.8218e-04
Loss = 3.2543e-02, PNorm = 132.4206, GNorm = 0.4615, lr_0 = 6.8171e-04
Loss = 3.5854e-02, PNorm = 132.4834, GNorm = 0.3070, lr_0 = 6.8125e-04
Loss = 3.0096e-02, PNorm = 132.5468, GNorm = 0.4089, lr_0 = 6.8078e-04
Loss = 3.1140e-02, PNorm = 132.6006, GNorm = 0.6028, lr_0 = 6.8031e-04
Loss = 3.4213e-02, PNorm = 132.6598, GNorm = 0.3618, lr_0 = 6.7985e-04
Loss = 3.1501e-02, PNorm = 132.7141, GNorm = 0.5694, lr_0 = 6.7938e-04
Loss = 3.9963e-02, PNorm = 132.7803, GNorm = 0.3430, lr_0 = 6.7892e-04
Loss = 3.4502e-02, PNorm = 132.8422, GNorm = 0.8056, lr_0 = 6.7845e-04
Loss = 3.3360e-02, PNorm = 132.9062, GNorm = 0.2653, lr_0 = 6.7799e-04
Loss = 3.6515e-02, PNorm = 132.9676, GNorm = 0.7502, lr_0 = 6.7752e-04
Loss = 4.2571e-02, PNorm = 133.0375, GNorm = 0.3913, lr_0 = 6.7706e-04
Loss = 3.5051e-02, PNorm = 133.1018, GNorm = 0.4584, lr_0 = 6.7659e-04
Loss = 3.8242e-02, PNorm = 133.1798, GNorm = 0.4472, lr_0 = 6.7613e-04
Loss = 3.9610e-02, PNorm = 133.2522, GNorm = 0.6796, lr_0 = 6.7567e-04
Loss = 3.5262e-02, PNorm = 133.3267, GNorm = 0.3240, lr_0 = 6.7520e-04
Loss = 3.5880e-02, PNorm = 133.3989, GNorm = 0.2893, lr_0 = 6.7474e-04
Loss = 3.3677e-02, PNorm = 133.4716, GNorm = 0.3626, lr_0 = 6.7428e-04
Loss = 3.7742e-02, PNorm = 133.5456, GNorm = 0.3784, lr_0 = 6.7382e-04
Loss = 3.6421e-02, PNorm = 133.6153, GNorm = 0.3327, lr_0 = 6.7335e-04
Loss = 3.8062e-02, PNorm = 133.6889, GNorm = 0.2635, lr_0 = 6.7289e-04
Loss = 3.3893e-02, PNorm = 133.7556, GNorm = 0.3087, lr_0 = 6.7243e-04
Loss = 3.4763e-02, PNorm = 133.8194, GNorm = 0.5033, lr_0 = 6.7197e-04
Loss = 3.5144e-02, PNorm = 133.8883, GNorm = 0.3516, lr_0 = 6.7151e-04
Loss = 3.5833e-02, PNorm = 133.9553, GNorm = 0.3907, lr_0 = 6.7105e-04
Loss = 3.5769e-02, PNorm = 134.0242, GNorm = 0.5123, lr_0 = 6.7059e-04
Loss = 3.5295e-02, PNorm = 134.0867, GNorm = 0.5038, lr_0 = 6.7013e-04
Loss = 3.3446e-02, PNorm = 134.1567, GNorm = 0.5290, lr_0 = 6.6967e-04
Loss = 3.5335e-02, PNorm = 134.2278, GNorm = 0.6622, lr_0 = 6.6921e-04
Loss = 3.0027e-02, PNorm = 134.2954, GNorm = 0.7928, lr_0 = 6.6876e-04
Loss = 3.7320e-02, PNorm = 134.3574, GNorm = 0.7176, lr_0 = 6.6830e-04
Loss = 3.6488e-02, PNorm = 134.4228, GNorm = 0.3566, lr_0 = 6.6784e-04
Loss = 3.7055e-02, PNorm = 134.4860, GNorm = 0.6304, lr_0 = 6.6738e-04
Loss = 4.2312e-02, PNorm = 134.5561, GNorm = 0.3007, lr_0 = 6.6693e-04
Loss = 3.6747e-02, PNorm = 134.6241, GNorm = 1.6410, lr_0 = 6.6647e-04
Loss = 3.8057e-02, PNorm = 134.6921, GNorm = 0.3431, lr_0 = 6.6601e-04
Loss = 3.2111e-02, PNorm = 134.7634, GNorm = 0.2200, lr_0 = 6.6556e-04
Loss = 3.8269e-02, PNorm = 134.8291, GNorm = 0.4461, lr_0 = 6.6510e-04
Loss = 3.2052e-02, PNorm = 134.9029, GNorm = 0.2365, lr_0 = 6.6464e-04
Loss = 3.1661e-02, PNorm = 134.9734, GNorm = 0.5209, lr_0 = 6.6419e-04
Loss = 3.7987e-02, PNorm = 135.0345, GNorm = 0.4040, lr_0 = 6.6373e-04
Loss = 3.7231e-02, PNorm = 135.1081, GNorm = 0.3222, lr_0 = 6.6328e-04
Loss = 3.6360e-02, PNorm = 135.1805, GNorm = 0.5862, lr_0 = 6.6282e-04
Validation mae = 0.287230
Epoch 7
Loss = 2.9130e-02, PNorm = 135.2406, GNorm = 0.5915, lr_0 = 6.6237e-04
Loss = 2.9046e-02, PNorm = 135.2965, GNorm = 0.3855, lr_0 = 6.6192e-04
Loss = 3.1091e-02, PNorm = 135.3423, GNorm = 0.3018, lr_0 = 6.6146e-04
Loss = 3.3504e-02, PNorm = 135.3971, GNorm = 0.6351, lr_0 = 6.6101e-04
Loss = 2.7567e-02, PNorm = 135.4527, GNorm = 0.4597, lr_0 = 6.6056e-04
Loss = 2.5294e-02, PNorm = 135.5047, GNorm = 0.2083, lr_0 = 6.6011e-04
Loss = 2.8320e-02, PNorm = 135.5532, GNorm = 0.3635, lr_0 = 6.5965e-04
Loss = 2.7924e-02, PNorm = 135.5999, GNorm = 0.9017, lr_0 = 6.5920e-04
Loss = 3.0397e-02, PNorm = 135.6432, GNorm = 0.5115, lr_0 = 6.5875e-04
Loss = 2.1772e-02, PNorm = 135.6886, GNorm = 0.2396, lr_0 = 6.5830e-04
Loss = 2.5772e-02, PNorm = 135.7265, GNorm = 0.4005, lr_0 = 6.5785e-04
Loss = 2.4262e-02, PNorm = 135.7753, GNorm = 0.3451, lr_0 = 6.5740e-04
Loss = 2.6343e-02, PNorm = 135.8164, GNorm = 0.7914, lr_0 = 6.5695e-04
Loss = 2.8520e-02, PNorm = 135.8636, GNorm = 0.2447, lr_0 = 6.5650e-04
Loss = 2.9706e-02, PNorm = 135.9048, GNorm = 1.4787, lr_0 = 6.5605e-04
Loss = 2.8345e-02, PNorm = 135.9563, GNorm = 0.5650, lr_0 = 6.5560e-04
Loss = 2.8896e-02, PNorm = 136.0032, GNorm = 0.4559, lr_0 = 6.5515e-04
Loss = 2.9771e-02, PNorm = 136.0528, GNorm = 0.2901, lr_0 = 6.5470e-04
Loss = 3.0654e-02, PNorm = 136.0891, GNorm = 0.3934, lr_0 = 6.5425e-04
Loss = 2.8298e-02, PNorm = 136.1375, GNorm = 0.6956, lr_0 = 6.5380e-04
Loss = 2.8685e-02, PNorm = 136.1825, GNorm = 0.5179, lr_0 = 6.5335e-04
Loss = 2.4033e-02, PNorm = 136.2335, GNorm = 0.2598, lr_0 = 6.5291e-04
Loss = 2.6533e-02, PNorm = 136.2732, GNorm = 0.2877, lr_0 = 6.5246e-04
Loss = 2.5474e-02, PNorm = 136.3209, GNorm = 0.2577, lr_0 = 6.5201e-04
Loss = 2.5518e-02, PNorm = 136.3641, GNorm = 0.3846, lr_0 = 6.5157e-04
Loss = 2.5845e-02, PNorm = 136.4078, GNorm = 0.2079, lr_0 = 6.5112e-04
Loss = 2.4833e-02, PNorm = 136.4528, GNorm = 0.1969, lr_0 = 6.5067e-04
Loss = 2.6442e-02, PNorm = 136.5002, GNorm = 0.4124, lr_0 = 6.5023e-04
Loss = 2.4672e-02, PNorm = 136.5518, GNorm = 0.3357, lr_0 = 6.4978e-04
Loss = 2.5990e-02, PNorm = 136.6046, GNorm = 0.5543, lr_0 = 6.4934e-04
Loss = 2.8211e-02, PNorm = 136.6541, GNorm = 0.5799, lr_0 = 6.4889e-04
Loss = 2.7261e-02, PNorm = 136.7016, GNorm = 0.2781, lr_0 = 6.4845e-04
Loss = 3.2475e-02, PNorm = 136.7488, GNorm = 0.3941, lr_0 = 6.4800e-04
Loss = 2.0153e-02, PNorm = 136.7919, GNorm = 0.3123, lr_0 = 6.4756e-04
Loss = 3.3660e-02, PNorm = 136.8359, GNorm = 0.6794, lr_0 = 6.4712e-04
Loss = 2.5848e-02, PNorm = 136.8807, GNorm = 0.4089, lr_0 = 6.4667e-04
Loss = 2.8331e-02, PNorm = 136.9202, GNorm = 0.3962, lr_0 = 6.4623e-04
Loss = 2.5688e-02, PNorm = 136.9639, GNorm = 0.4506, lr_0 = 6.4579e-04
Loss = 2.5362e-02, PNorm = 137.0081, GNorm = 0.4390, lr_0 = 6.4534e-04
Loss = 3.0360e-02, PNorm = 137.0576, GNorm = 0.3741, lr_0 = 6.4490e-04
Loss = 2.9560e-02, PNorm = 137.1097, GNorm = 0.2109, lr_0 = 6.4446e-04
Loss = 2.6802e-02, PNorm = 137.1575, GNorm = 0.4602, lr_0 = 6.4402e-04
Loss = 2.5048e-02, PNorm = 137.2097, GNorm = 0.2581, lr_0 = 6.4358e-04
Loss = 2.1740e-02, PNorm = 137.2631, GNorm = 0.3917, lr_0 = 6.4314e-04
Loss = 2.2712e-02, PNorm = 137.3124, GNorm = 0.6149, lr_0 = 6.4270e-04
Loss = 2.6452e-02, PNorm = 137.3629, GNorm = 0.2315, lr_0 = 6.4226e-04
Loss = 2.8329e-02, PNorm = 137.4144, GNorm = 0.3351, lr_0 = 6.4182e-04
Loss = 2.4097e-02, PNorm = 137.4660, GNorm = 0.1744, lr_0 = 6.4138e-04
Loss = 2.6493e-02, PNorm = 137.5141, GNorm = 0.3991, lr_0 = 6.4094e-04
Loss = 2.5681e-02, PNorm = 137.5599, GNorm = 0.5159, lr_0 = 6.4050e-04
Loss = 2.6011e-02, PNorm = 137.6126, GNorm = 0.2850, lr_0 = 6.4006e-04
Loss = 2.3209e-02, PNorm = 137.6660, GNorm = 0.7757, lr_0 = 6.3962e-04
Loss = 2.7210e-02, PNorm = 137.7178, GNorm = 0.2088, lr_0 = 6.3918e-04
Loss = 2.7210e-02, PNorm = 137.7685, GNorm = 0.2700, lr_0 = 6.3874e-04
Loss = 2.6569e-02, PNorm = 137.8207, GNorm = 0.2998, lr_0 = 6.3831e-04
Loss = 2.3296e-02, PNorm = 137.8739, GNorm = 0.2284, lr_0 = 6.3787e-04
Loss = 2.8234e-02, PNorm = 137.9257, GNorm = 0.5910, lr_0 = 6.3743e-04
Loss = 2.6140e-02, PNorm = 137.9808, GNorm = 0.2140, lr_0 = 6.3700e-04
Loss = 2.7635e-02, PNorm = 138.0309, GNorm = 0.1778, lr_0 = 6.3656e-04
Loss = 2.2920e-02, PNorm = 138.0828, GNorm = 0.2945, lr_0 = 6.3612e-04
Loss = 2.8350e-02, PNorm = 138.1256, GNorm = 0.5590, lr_0 = 6.3569e-04
Loss = 2.4282e-02, PNorm = 138.1819, GNorm = 0.7208, lr_0 = 6.3525e-04
Loss = 2.6436e-02, PNorm = 138.2285, GNorm = 0.4938, lr_0 = 6.3482e-04
Loss = 2.3687e-02, PNorm = 138.2845, GNorm = 0.7560, lr_0 = 6.3438e-04
Loss = 2.3339e-02, PNorm = 138.3317, GNorm = 0.3135, lr_0 = 6.3395e-04
Loss = 3.2177e-02, PNorm = 138.3761, GNorm = 0.2862, lr_0 = 6.3351e-04
Loss = 2.5248e-02, PNorm = 138.4258, GNorm = 0.5848, lr_0 = 6.3308e-04
Loss = 2.2048e-02, PNorm = 138.4768, GNorm = 0.5798, lr_0 = 6.3265e-04
Loss = 2.2651e-02, PNorm = 138.5182, GNorm = 0.2632, lr_0 = 6.3221e-04
Loss = 2.6063e-02, PNorm = 138.5622, GNorm = 0.3423, lr_0 = 6.3178e-04
Loss = 2.5968e-02, PNorm = 138.6152, GNorm = 0.7408, lr_0 = 6.3135e-04
Loss = 2.6411e-02, PNorm = 138.6712, GNorm = 0.2878, lr_0 = 6.3091e-04
Loss = 2.9329e-02, PNorm = 138.7199, GNorm = 0.6236, lr_0 = 6.3048e-04
Loss = 2.6725e-02, PNorm = 138.7761, GNorm = 0.3371, lr_0 = 6.3005e-04
Loss = 2.4786e-02, PNorm = 138.8275, GNorm = 0.3138, lr_0 = 6.2962e-04
Loss = 3.6028e-02, PNorm = 138.8843, GNorm = 0.5640, lr_0 = 6.2919e-04
Loss = 2.8593e-02, PNorm = 138.9383, GNorm = 0.4555, lr_0 = 6.2876e-04
Loss = 2.7797e-02, PNorm = 138.9936, GNorm = 0.4771, lr_0 = 6.2833e-04
Loss = 2.6399e-02, PNorm = 139.0506, GNorm = 0.6832, lr_0 = 6.2789e-04
Loss = 3.2899e-02, PNorm = 139.1079, GNorm = 0.5848, lr_0 = 6.2746e-04
Loss = 2.5922e-02, PNorm = 139.1702, GNorm = 0.3209, lr_0 = 6.2703e-04
Loss = 2.9352e-02, PNorm = 139.2323, GNorm = 0.1719, lr_0 = 6.2661e-04
Loss = 2.6168e-02, PNorm = 139.2902, GNorm = 0.4808, lr_0 = 6.2618e-04
Loss = 2.7780e-02, PNorm = 139.3454, GNorm = 0.3671, lr_0 = 6.2575e-04
Loss = 2.7253e-02, PNorm = 139.4024, GNorm = 0.4014, lr_0 = 6.2532e-04
Loss = 2.5846e-02, PNorm = 139.4611, GNorm = 0.4391, lr_0 = 6.2489e-04
Loss = 2.8081e-02, PNorm = 139.5237, GNorm = 0.7371, lr_0 = 6.2446e-04
Loss = 2.4636e-02, PNorm = 139.5867, GNorm = 0.5644, lr_0 = 6.2403e-04
Loss = 2.6013e-02, PNorm = 139.6437, GNorm = 0.3949, lr_0 = 6.2361e-04
Loss = 3.2225e-02, PNorm = 139.7024, GNorm = 0.6192, lr_0 = 6.2318e-04
Loss = 3.1527e-02, PNorm = 139.7683, GNorm = 0.2081, lr_0 = 6.2275e-04
Loss = 2.5575e-02, PNorm = 139.8234, GNorm = 0.3156, lr_0 = 6.2233e-04
Loss = 2.9221e-02, PNorm = 139.8801, GNorm = 0.6026, lr_0 = 6.2190e-04
Loss = 2.7228e-02, PNorm = 139.9314, GNorm = 0.2060, lr_0 = 6.2147e-04
Loss = 3.2790e-02, PNorm = 139.9963, GNorm = 0.5393, lr_0 = 6.2105e-04
Loss = 3.1052e-02, PNorm = 140.0555, GNorm = 0.4296, lr_0 = 6.2062e-04
Loss = 2.7739e-02, PNorm = 140.1196, GNorm = 0.7304, lr_0 = 6.2020e-04
Loss = 3.0385e-02, PNorm = 140.1767, GNorm = 0.2018, lr_0 = 6.1977e-04
Loss = 3.0558e-02, PNorm = 140.2458, GNorm = 0.2293, lr_0 = 6.1935e-04
Loss = 3.2711e-02, PNorm = 140.3116, GNorm = 0.4109, lr_0 = 6.1892e-04
Loss = 2.7038e-02, PNorm = 140.3733, GNorm = 0.2352, lr_0 = 6.1850e-04
Loss = 3.1873e-02, PNorm = 140.4297, GNorm = 0.4108, lr_0 = 6.1808e-04
Loss = 3.9219e-02, PNorm = 140.4958, GNorm = 0.4885, lr_0 = 6.1765e-04
Loss = 3.3217e-02, PNorm = 140.5566, GNorm = 0.7371, lr_0 = 6.1723e-04
Loss = 2.9349e-02, PNorm = 140.6222, GNorm = 0.2324, lr_0 = 6.1681e-04
Loss = 2.5197e-02, PNorm = 140.6826, GNorm = 0.2829, lr_0 = 6.1638e-04
Loss = 2.6179e-02, PNorm = 140.7386, GNorm = 0.2431, lr_0 = 6.1596e-04
Loss = 3.2222e-02, PNorm = 140.8009, GNorm = 0.2353, lr_0 = 6.1554e-04
Loss = 2.6355e-02, PNorm = 140.8590, GNorm = 0.5258, lr_0 = 6.1512e-04
Loss = 2.6128e-02, PNorm = 140.9137, GNorm = 0.6667, lr_0 = 6.1470e-04
Loss = 2.8797e-02, PNorm = 140.9670, GNorm = 0.2452, lr_0 = 6.1428e-04
Loss = 2.6548e-02, PNorm = 141.0169, GNorm = 0.3208, lr_0 = 6.1385e-04
Loss = 2.5807e-02, PNorm = 141.0695, GNorm = 0.6422, lr_0 = 6.1343e-04
Loss = 2.7783e-02, PNorm = 141.1279, GNorm = 0.4831, lr_0 = 6.1301e-04
Loss = 3.0977e-02, PNorm = 141.1918, GNorm = 0.7542, lr_0 = 6.1259e-04
Loss = 3.5501e-02, PNorm = 141.2538, GNorm = 0.3471, lr_0 = 6.1217e-04
Loss = 3.2141e-02, PNorm = 141.3154, GNorm = 0.4649, lr_0 = 6.1175e-04
Loss = 3.1979e-02, PNorm = 141.3832, GNorm = 0.4546, lr_0 = 6.1134e-04
Loss = 3.0148e-02, PNorm = 141.4483, GNorm = 0.2600, lr_0 = 6.1092e-04
Loss = 2.9696e-02, PNorm = 141.5116, GNorm = 0.5144, lr_0 = 6.1050e-04
Validation mae = 0.284655
Epoch 8
Loss = 2.2782e-02, PNorm = 141.5609, GNorm = 0.2286, lr_0 = 6.1008e-04
Loss = 2.5764e-02, PNorm = 141.5981, GNorm = 0.5341, lr_0 = 6.0966e-04
Loss = 2.2333e-02, PNorm = 141.6393, GNorm = 0.8666, lr_0 = 6.0924e-04
Loss = 2.5920e-02, PNorm = 141.6788, GNorm = 0.2745, lr_0 = 6.0883e-04
Loss = 2.2084e-02, PNorm = 141.7240, GNorm = 0.4291, lr_0 = 6.0841e-04
Loss = 2.1129e-02, PNorm = 141.7619, GNorm = 0.2495, lr_0 = 6.0799e-04
Loss = 1.9603e-02, PNorm = 141.8011, GNorm = 0.1287, lr_0 = 6.0758e-04
Loss = 2.5143e-02, PNorm = 141.8326, GNorm = 0.2277, lr_0 = 6.0716e-04
Loss = 2.4612e-02, PNorm = 141.8629, GNorm = 0.3218, lr_0 = 6.0674e-04
Loss = 2.1406e-02, PNorm = 141.8949, GNorm = 0.2166, lr_0 = 6.0633e-04
Loss = 2.3054e-02, PNorm = 141.9312, GNorm = 0.2653, lr_0 = 6.0591e-04
Loss = 2.0357e-02, PNorm = 141.9690, GNorm = 0.7173, lr_0 = 6.0550e-04
Loss = 2.0703e-02, PNorm = 142.0018, GNorm = 0.6052, lr_0 = 6.0508e-04
Loss = 2.0811e-02, PNorm = 142.0405, GNorm = 0.3569, lr_0 = 6.0467e-04
Loss = 2.2413e-02, PNorm = 142.0796, GNorm = 0.3689, lr_0 = 6.0425e-04
Loss = 1.8675e-02, PNorm = 142.1126, GNorm = 0.5331, lr_0 = 6.0384e-04
Loss = 2.0669e-02, PNorm = 142.1439, GNorm = 0.2470, lr_0 = 6.0343e-04
Loss = 2.2510e-02, PNorm = 142.1781, GNorm = 0.2467, lr_0 = 6.0301e-04
Loss = 2.0105e-02, PNorm = 142.2188, GNorm = 0.2561, lr_0 = 6.0260e-04
Loss = 2.1649e-02, PNorm = 142.2527, GNorm = 0.3564, lr_0 = 6.0219e-04
Loss = 1.9113e-02, PNorm = 142.2903, GNorm = 0.1869, lr_0 = 6.0178e-04
Loss = 1.7585e-02, PNorm = 142.3297, GNorm = 0.3207, lr_0 = 6.0136e-04
Loss = 1.9188e-02, PNorm = 142.3635, GNorm = 0.5810, lr_0 = 6.0095e-04
Loss = 1.8474e-02, PNorm = 142.4033, GNorm = 0.3659, lr_0 = 6.0054e-04
Loss = 2.3089e-02, PNorm = 142.4392, GNorm = 0.2063, lr_0 = 6.0013e-04
Loss = 1.8990e-02, PNorm = 142.4699, GNorm = 0.3537, lr_0 = 5.9972e-04
Loss = 1.9787e-02, PNorm = 142.5036, GNorm = 0.2199, lr_0 = 5.9931e-04
Loss = 1.6993e-02, PNorm = 142.5399, GNorm = 0.1787, lr_0 = 5.9890e-04
Loss = 2.3499e-02, PNorm = 142.5689, GNorm = 0.3548, lr_0 = 5.9849e-04
Loss = 2.0158e-02, PNorm = 142.6072, GNorm = 0.4398, lr_0 = 5.9808e-04
Loss = 3.0264e-02, PNorm = 142.6409, GNorm = 0.7474, lr_0 = 5.9767e-04
Loss = 1.8895e-02, PNorm = 142.6860, GNorm = 0.3673, lr_0 = 5.9726e-04
Loss = 2.1867e-02, PNorm = 142.7303, GNorm = 0.2867, lr_0 = 5.9685e-04
Loss = 1.7901e-02, PNorm = 142.7743, GNorm = 0.1803, lr_0 = 5.9644e-04
Loss = 1.8440e-02, PNorm = 142.8160, GNorm = 0.5211, lr_0 = 5.9603e-04
Loss = 1.9798e-02, PNorm = 142.8562, GNorm = 0.4047, lr_0 = 5.9562e-04
Loss = 2.4400e-02, PNorm = 142.9027, GNorm = 0.3226, lr_0 = 5.9521e-04
Loss = 2.2619e-02, PNorm = 142.9483, GNorm = 0.3160, lr_0 = 5.9481e-04
Loss = 1.9808e-02, PNorm = 142.9936, GNorm = 0.6455, lr_0 = 5.9440e-04
Loss = 1.9071e-02, PNorm = 143.0366, GNorm = 0.3339, lr_0 = 5.9399e-04
Loss = 1.9831e-02, PNorm = 143.0790, GNorm = 0.1981, lr_0 = 5.9358e-04
Loss = 2.1544e-02, PNorm = 143.1214, GNorm = 0.6301, lr_0 = 5.9318e-04
Loss = 2.2426e-02, PNorm = 143.1644, GNorm = 0.3420, lr_0 = 5.9277e-04
Loss = 2.6504e-02, PNorm = 143.2016, GNorm = 0.7851, lr_0 = 5.9236e-04
Loss = 2.1341e-02, PNorm = 143.2494, GNorm = 0.2690, lr_0 = 5.9196e-04
Loss = 2.1584e-02, PNorm = 143.2922, GNorm = 0.3042, lr_0 = 5.9155e-04
Loss = 1.9390e-02, PNorm = 143.3371, GNorm = 0.2480, lr_0 = 5.9115e-04
Loss = 1.8875e-02, PNorm = 143.3831, GNorm = 0.1893, lr_0 = 5.9074e-04
Loss = 2.1938e-02, PNorm = 143.4338, GNorm = 0.5743, lr_0 = 5.9034e-04
Loss = 1.8158e-02, PNorm = 143.4743, GNorm = 0.6795, lr_0 = 5.8993e-04
Loss = 2.2547e-02, PNorm = 143.5178, GNorm = 0.1860, lr_0 = 5.8953e-04
Loss = 2.1825e-02, PNorm = 143.5635, GNorm = 0.4618, lr_0 = 5.8913e-04
Loss = 2.2324e-02, PNorm = 143.6080, GNorm = 0.2561, lr_0 = 5.8872e-04
Loss = 2.1435e-02, PNorm = 143.6452, GNorm = 0.4957, lr_0 = 5.8832e-04
Loss = 2.0244e-02, PNorm = 143.6874, GNorm = 0.3077, lr_0 = 5.8792e-04
Loss = 2.1101e-02, PNorm = 143.7319, GNorm = 0.2903, lr_0 = 5.8751e-04
Loss = 2.0016e-02, PNorm = 143.7772, GNorm = 0.5952, lr_0 = 5.8711e-04
Loss = 2.0580e-02, PNorm = 143.8187, GNorm = 0.3793, lr_0 = 5.8671e-04
Loss = 2.3821e-02, PNorm = 143.8608, GNorm = 0.4202, lr_0 = 5.8631e-04
Loss = 2.1711e-02, PNorm = 143.9097, GNorm = 0.1742, lr_0 = 5.8591e-04
Loss = 2.0824e-02, PNorm = 143.9564, GNorm = 0.7273, lr_0 = 5.8550e-04
Loss = 2.0291e-02, PNorm = 144.0007, GNorm = 0.1612, lr_0 = 5.8510e-04
Loss = 1.8250e-02, PNorm = 144.0466, GNorm = 0.2233, lr_0 = 5.8470e-04
Loss = 2.1274e-02, PNorm = 144.0896, GNorm = 0.4511, lr_0 = 5.8430e-04
Loss = 2.1805e-02, PNorm = 144.1283, GNorm = 0.2083, lr_0 = 5.8390e-04
Loss = 2.2094e-02, PNorm = 144.1758, GNorm = 0.4548, lr_0 = 5.8350e-04
Loss = 1.8574e-02, PNorm = 144.2252, GNorm = 0.3647, lr_0 = 5.8310e-04
Loss = 1.8344e-02, PNorm = 144.2732, GNorm = 0.2456, lr_0 = 5.8270e-04
Loss = 2.0261e-02, PNorm = 144.3153, GNorm = 0.1886, lr_0 = 5.8230e-04
Loss = 2.0866e-02, PNorm = 144.3552, GNorm = 0.5482, lr_0 = 5.8190e-04
Loss = 2.2360e-02, PNorm = 144.4004, GNorm = 0.9377, lr_0 = 5.8151e-04
Loss = 1.9089e-02, PNorm = 144.4432, GNorm = 0.3243, lr_0 = 5.8111e-04
Loss = 2.3418e-02, PNorm = 144.4935, GNorm = 0.3299, lr_0 = 5.8071e-04
Loss = 2.2627e-02, PNorm = 144.5426, GNorm = 0.2468, lr_0 = 5.8031e-04
Loss = 2.0135e-02, PNorm = 144.5942, GNorm = 0.5159, lr_0 = 5.7991e-04
Loss = 1.6269e-02, PNorm = 144.6383, GNorm = 0.3646, lr_0 = 5.7952e-04
Loss = 2.0411e-02, PNorm = 144.6768, GNorm = 0.3469, lr_0 = 5.7912e-04
Loss = 2.2430e-02, PNorm = 144.7227, GNorm = 0.2524, lr_0 = 5.7872e-04
Loss = 2.1485e-02, PNorm = 144.7734, GNorm = 0.4887, lr_0 = 5.7833e-04
Loss = 2.0625e-02, PNorm = 144.8209, GNorm = 0.5490, lr_0 = 5.7793e-04
Loss = 2.2205e-02, PNorm = 144.8658, GNorm = 0.4206, lr_0 = 5.7753e-04
Loss = 2.1790e-02, PNorm = 144.9147, GNorm = 0.2642, lr_0 = 5.7714e-04
Loss = 2.0721e-02, PNorm = 144.9630, GNorm = 0.4628, lr_0 = 5.7674e-04
Loss = 2.1253e-02, PNorm = 145.0085, GNorm = 0.3601, lr_0 = 5.7635e-04
Loss = 2.0542e-02, PNorm = 145.0562, GNorm = 0.3268, lr_0 = 5.7595e-04
Loss = 2.1541e-02, PNorm = 145.0980, GNorm = 0.4619, lr_0 = 5.7556e-04
Loss = 2.0807e-02, PNorm = 145.1342, GNorm = 0.4176, lr_0 = 5.7516e-04
Loss = 1.9598e-02, PNorm = 145.1748, GNorm = 0.2016, lr_0 = 5.7477e-04
Loss = 2.2044e-02, PNorm = 145.2175, GNorm = 0.3428, lr_0 = 5.7438e-04
Loss = 2.8815e-02, PNorm = 145.2671, GNorm = 0.8618, lr_0 = 5.7398e-04
Loss = 2.1515e-02, PNorm = 145.3203, GNorm = 0.5990, lr_0 = 5.7359e-04
Loss = 2.4259e-02, PNorm = 145.3765, GNorm = 0.4759, lr_0 = 5.7320e-04
Loss = 2.0341e-02, PNorm = 145.4304, GNorm = 0.3477, lr_0 = 5.7280e-04
Loss = 2.4660e-02, PNorm = 145.4865, GNorm = 0.7488, lr_0 = 5.7241e-04
Loss = 2.2884e-02, PNorm = 145.5385, GNorm = 0.1633, lr_0 = 5.7202e-04
Loss = 2.8887e-02, PNorm = 145.5954, GNorm = 0.6361, lr_0 = 5.7163e-04
Loss = 1.9781e-02, PNorm = 145.6480, GNorm = 0.1752, lr_0 = 5.7124e-04
Loss = 2.3193e-02, PNorm = 145.6963, GNorm = 0.2665, lr_0 = 5.7084e-04
Loss = 1.9137e-02, PNorm = 145.7446, GNorm = 0.3899, lr_0 = 5.7045e-04
Loss = 2.2285e-02, PNorm = 145.7926, GNorm = 0.2040, lr_0 = 5.7006e-04
Loss = 2.0667e-02, PNorm = 145.8388, GNorm = 0.4568, lr_0 = 5.6967e-04
Loss = 2.5619e-02, PNorm = 145.8834, GNorm = 0.3312, lr_0 = 5.6928e-04
Loss = 2.1909e-02, PNorm = 145.9312, GNorm = 0.2210, lr_0 = 5.6889e-04
Loss = 2.5271e-02, PNorm = 145.9837, GNorm = 0.2683, lr_0 = 5.6850e-04
Loss = 2.5382e-02, PNorm = 146.0412, GNorm = 0.4071, lr_0 = 5.6811e-04
Loss = 2.6595e-02, PNorm = 146.0955, GNorm = 1.1605, lr_0 = 5.6772e-04
Loss = 2.6915e-02, PNorm = 146.1497, GNorm = 0.2648, lr_0 = 5.6733e-04
Loss = 2.5310e-02, PNorm = 146.1994, GNorm = 0.4120, lr_0 = 5.6695e-04
Loss = 2.5365e-02, PNorm = 146.2545, GNorm = 0.4611, lr_0 = 5.6656e-04
Loss = 2.4699e-02, PNorm = 146.3047, GNorm = 0.3646, lr_0 = 5.6617e-04
Loss = 2.0229e-02, PNorm = 146.3515, GNorm = 0.4021, lr_0 = 5.6578e-04
Loss = 1.8420e-02, PNorm = 146.4012, GNorm = 0.4416, lr_0 = 5.6539e-04
Loss = 2.1759e-02, PNorm = 146.4495, GNorm = 0.3105, lr_0 = 5.6501e-04
Loss = 2.5625e-02, PNorm = 146.5028, GNorm = 0.3078, lr_0 = 5.6462e-04
Loss = 2.3457e-02, PNorm = 146.5562, GNorm = 0.4558, lr_0 = 5.6423e-04
Loss = 2.3107e-02, PNorm = 146.6108, GNorm = 0.2136, lr_0 = 5.6385e-04
Loss = 2.1537e-02, PNorm = 146.6688, GNorm = 0.1870, lr_0 = 5.6346e-04
Loss = 2.3150e-02, PNorm = 146.7234, GNorm = 0.4310, lr_0 = 5.6307e-04
Loss = 2.1860e-02, PNorm = 146.7724, GNorm = 0.2036, lr_0 = 5.6269e-04
Loss = 2.9278e-02, PNorm = 146.8226, GNorm = 0.2677, lr_0 = 5.6230e-04
Validation mae = 0.283974
Epoch 9
Loss = 1.9909e-02, PNorm = 146.8708, GNorm = 0.3564, lr_0 = 5.6192e-04
Loss = 1.7904e-02, PNorm = 146.9117, GNorm = 0.1460, lr_0 = 5.6153e-04
Loss = 1.7782e-02, PNorm = 146.9435, GNorm = 0.1766, lr_0 = 5.6115e-04
Loss = 1.7811e-02, PNorm = 146.9780, GNorm = 0.3867, lr_0 = 5.6076e-04
Loss = 2.3441e-02, PNorm = 147.0078, GNorm = 0.1837, lr_0 = 5.6038e-04
Loss = 1.9322e-02, PNorm = 147.0410, GNorm = 0.7207, lr_0 = 5.6000e-04
Loss = 1.6853e-02, PNorm = 147.0706, GNorm = 0.2461, lr_0 = 5.5961e-04
Loss = 1.7866e-02, PNorm = 147.1017, GNorm = 0.1602, lr_0 = 5.5923e-04
Loss = 1.7736e-02, PNorm = 147.1355, GNorm = 0.3203, lr_0 = 5.5885e-04
Loss = 1.7485e-02, PNorm = 147.1668, GNorm = 0.5267, lr_0 = 5.5846e-04
Loss = 1.8727e-02, PNorm = 147.1974, GNorm = 0.6777, lr_0 = 5.5808e-04
Loss = 1.8230e-02, PNorm = 147.2324, GNorm = 1.1421, lr_0 = 5.5770e-04
Loss = 1.6334e-02, PNorm = 147.2657, GNorm = 0.3585, lr_0 = 5.5732e-04
Loss = 1.9151e-02, PNorm = 147.2992, GNorm = 0.3028, lr_0 = 5.5693e-04
Loss = 1.5695e-02, PNorm = 147.3351, GNorm = 0.2327, lr_0 = 5.5655e-04
Loss = 2.0291e-02, PNorm = 147.3692, GNorm = 0.2073, lr_0 = 5.5617e-04
Loss = 1.8618e-02, PNorm = 147.3977, GNorm = 0.2350, lr_0 = 5.5579e-04
Loss = 1.5242e-02, PNorm = 147.4310, GNorm = 0.4841, lr_0 = 5.5541e-04
Loss = 1.5852e-02, PNorm = 147.4655, GNorm = 0.4543, lr_0 = 5.5503e-04
Loss = 1.9275e-02, PNorm = 147.4982, GNorm = 0.2534, lr_0 = 5.5465e-04
Loss = 1.6633e-02, PNorm = 147.5336, GNorm = 0.2570, lr_0 = 5.5427e-04
Loss = 1.9812e-02, PNorm = 147.5696, GNorm = 0.6795, lr_0 = 5.5389e-04
Loss = 1.6793e-02, PNorm = 147.6094, GNorm = 0.4393, lr_0 = 5.5351e-04
Loss = 1.9478e-02, PNorm = 147.6483, GNorm = 0.5877, lr_0 = 5.5313e-04
Loss = 1.7129e-02, PNorm = 147.6812, GNorm = 0.1601, lr_0 = 5.5275e-04
Loss = 2.0048e-02, PNorm = 147.7104, GNorm = 0.2079, lr_0 = 5.5237e-04
Loss = 2.0402e-02, PNorm = 147.7462, GNorm = 0.2349, lr_0 = 5.5199e-04
Loss = 1.5676e-02, PNorm = 147.7797, GNorm = 0.1873, lr_0 = 5.5162e-04
Loss = 1.7398e-02, PNorm = 147.8138, GNorm = 0.2798, lr_0 = 5.5124e-04
Loss = 1.8840e-02, PNorm = 147.8516, GNorm = 0.1161, lr_0 = 5.5086e-04
Loss = 1.9579e-02, PNorm = 147.8841, GNorm = 0.4074, lr_0 = 5.5048e-04
Loss = 1.6550e-02, PNorm = 147.9192, GNorm = 0.2797, lr_0 = 5.5011e-04
Loss = 1.8072e-02, PNorm = 147.9558, GNorm = 0.2909, lr_0 = 5.4973e-04
Loss = 1.8624e-02, PNorm = 147.9941, GNorm = 0.3982, lr_0 = 5.4935e-04
Loss = 1.7175e-02, PNorm = 148.0276, GNorm = 0.5090, lr_0 = 5.4898e-04
Loss = 1.7129e-02, PNorm = 148.0657, GNorm = 0.6540, lr_0 = 5.4860e-04
Loss = 1.3750e-02, PNorm = 148.1037, GNorm = 0.2661, lr_0 = 5.4822e-04
Loss = 1.6213e-02, PNorm = 148.1351, GNorm = 0.9688, lr_0 = 5.4785e-04
Loss = 2.0707e-02, PNorm = 148.1727, GNorm = 0.7024, lr_0 = 5.4747e-04
Loss = 1.6570e-02, PNorm = 148.2086, GNorm = 0.4731, lr_0 = 5.4710e-04
Loss = 1.7911e-02, PNorm = 148.2490, GNorm = 0.3070, lr_0 = 5.4672e-04
Loss = 1.6238e-02, PNorm = 148.2855, GNorm = 0.5077, lr_0 = 5.4635e-04
Loss = 1.3988e-02, PNorm = 148.3222, GNorm = 0.1713, lr_0 = 5.4597e-04
Loss = 1.7506e-02, PNorm = 148.3584, GNorm = 0.4020, lr_0 = 5.4560e-04
Loss = 1.8828e-02, PNorm = 148.3894, GNorm = 0.3487, lr_0 = 5.4523e-04
Loss = 1.5146e-02, PNorm = 148.4228, GNorm = 0.5569, lr_0 = 5.4485e-04
Loss = 1.6820e-02, PNorm = 148.4565, GNorm = 0.4595, lr_0 = 5.4448e-04
Loss = 1.5577e-02, PNorm = 148.4859, GNorm = 0.5613, lr_0 = 5.4411e-04
Loss = 1.9461e-02, PNorm = 148.5185, GNorm = 0.1553, lr_0 = 5.4373e-04
Loss = 1.5730e-02, PNorm = 148.5554, GNorm = 0.2916, lr_0 = 5.4336e-04
Loss = 2.0138e-02, PNorm = 148.5871, GNorm = 0.4595, lr_0 = 5.4299e-04
Loss = 1.8655e-02, PNorm = 148.6283, GNorm = 0.5922, lr_0 = 5.4262e-04
Loss = 2.0863e-02, PNorm = 148.6726, GNorm = 0.1669, lr_0 = 5.4225e-04
Loss = 1.7461e-02, PNorm = 148.7109, GNorm = 0.4881, lr_0 = 5.4187e-04
Loss = 1.4497e-02, PNorm = 148.7459, GNorm = 0.1530, lr_0 = 5.4150e-04
Loss = 1.6993e-02, PNorm = 148.7789, GNorm = 0.3965, lr_0 = 5.4113e-04
Loss = 1.5725e-02, PNorm = 148.8141, GNorm = 0.1918, lr_0 = 5.4076e-04
Loss = 1.6206e-02, PNorm = 148.8501, GNorm = 0.2624, lr_0 = 5.4039e-04
Loss = 2.1655e-02, PNorm = 148.8915, GNorm = 0.3511, lr_0 = 5.4002e-04
Loss = 1.7021e-02, PNorm = 148.9345, GNorm = 0.2742, lr_0 = 5.3965e-04
Loss = 1.7935e-02, PNorm = 148.9712, GNorm = 0.1634, lr_0 = 5.3928e-04
Loss = 1.9644e-02, PNorm = 149.0074, GNorm = 0.1334, lr_0 = 5.3891e-04
Loss = 1.4480e-02, PNorm = 149.0402, GNorm = 0.3152, lr_0 = 5.3854e-04
Loss = 1.6144e-02, PNorm = 149.0782, GNorm = 0.1671, lr_0 = 5.3817e-04
Loss = 1.6361e-02, PNorm = 149.1138, GNorm = 0.4990, lr_0 = 5.3781e-04
Loss = 1.7274e-02, PNorm = 149.1504, GNorm = 0.5342, lr_0 = 5.3744e-04
Loss = 1.9569e-02, PNorm = 149.1880, GNorm = 0.4193, lr_0 = 5.3707e-04
Loss = 1.7452e-02, PNorm = 149.2259, GNorm = 0.6016, lr_0 = 5.3670e-04
Loss = 1.8547e-02, PNorm = 149.2628, GNorm = 0.8830, lr_0 = 5.3633e-04
Loss = 1.7767e-02, PNorm = 149.3025, GNorm = 0.2691, lr_0 = 5.3597e-04
Loss = 1.7650e-02, PNorm = 149.3315, GNorm = 0.6148, lr_0 = 5.3560e-04
Loss = 1.5550e-02, PNorm = 149.3733, GNorm = 0.5230, lr_0 = 5.3523e-04
Loss = 1.6109e-02, PNorm = 149.4128, GNorm = 0.6567, lr_0 = 5.3486e-04
Loss = 1.9675e-02, PNorm = 149.4528, GNorm = 0.1817, lr_0 = 5.3450e-04
Loss = 1.8035e-02, PNorm = 149.4918, GNorm = 0.7243, lr_0 = 5.3413e-04
Loss = 1.8209e-02, PNorm = 149.5270, GNorm = 0.5539, lr_0 = 5.3377e-04
Loss = 2.0569e-02, PNorm = 149.5655, GNorm = 0.9471, lr_0 = 5.3340e-04
Loss = 2.0657e-02, PNorm = 149.6061, GNorm = 0.8643, lr_0 = 5.3304e-04
Loss = 1.5841e-02, PNorm = 149.6442, GNorm = 0.3869, lr_0 = 5.3267e-04
Loss = 1.6510e-02, PNorm = 149.6831, GNorm = 0.5813, lr_0 = 5.3231e-04
Loss = 1.4531e-02, PNorm = 149.7220, GNorm = 0.1565, lr_0 = 5.3194e-04
Loss = 1.7329e-02, PNorm = 149.7612, GNorm = 0.2975, lr_0 = 5.3158e-04
Loss = 1.8082e-02, PNorm = 149.7958, GNorm = 0.1495, lr_0 = 5.3121e-04
Loss = 1.7167e-02, PNorm = 149.8316, GNorm = 0.3396, lr_0 = 5.3085e-04
Loss = 1.5177e-02, PNorm = 149.8701, GNorm = 0.2794, lr_0 = 5.3048e-04
Loss = 1.7026e-02, PNorm = 149.9084, GNorm = 0.2141, lr_0 = 5.3012e-04
Loss = 2.0622e-02, PNorm = 149.9453, GNorm = 0.5513, lr_0 = 5.2976e-04
Loss = 1.7205e-02, PNorm = 149.9828, GNorm = 0.4238, lr_0 = 5.2939e-04
Loss = 1.5750e-02, PNorm = 150.0198, GNorm = 0.3716, lr_0 = 5.2903e-04
Loss = 1.5910e-02, PNorm = 150.0569, GNorm = 0.2768, lr_0 = 5.2867e-04
Loss = 1.5751e-02, PNorm = 150.0921, GNorm = 0.3639, lr_0 = 5.2831e-04
Loss = 1.5457e-02, PNorm = 150.1270, GNorm = 0.1669, lr_0 = 5.2795e-04
Loss = 1.4882e-02, PNorm = 150.1580, GNorm = 0.2051, lr_0 = 5.2758e-04
Loss = 1.8234e-02, PNorm = 150.1907, GNorm = 0.4014, lr_0 = 5.2722e-04
Loss = 1.8119e-02, PNorm = 150.2249, GNorm = 0.2130, lr_0 = 5.2686e-04
Loss = 1.5245e-02, PNorm = 150.2653, GNorm = 0.4848, lr_0 = 5.2650e-04
Loss = 1.7293e-02, PNorm = 150.3038, GNorm = 0.4948, lr_0 = 5.2614e-04
Loss = 1.7713e-02, PNorm = 150.3423, GNorm = 0.4342, lr_0 = 5.2578e-04
Loss = 1.7055e-02, PNorm = 150.3795, GNorm = 0.3577, lr_0 = 5.2542e-04
Loss = 1.6026e-02, PNorm = 150.4216, GNorm = 0.3916, lr_0 = 5.2506e-04
Loss = 1.6760e-02, PNorm = 150.4554, GNorm = 0.4018, lr_0 = 5.2470e-04
Loss = 1.6244e-02, PNorm = 150.4975, GNorm = 0.3006, lr_0 = 5.2434e-04
Loss = 1.7662e-02, PNorm = 150.5339, GNorm = 0.2016, lr_0 = 5.2398e-04
Loss = 2.0409e-02, PNorm = 150.5678, GNorm = 0.3082, lr_0 = 5.2362e-04
Loss = 2.0100e-02, PNorm = 150.6006, GNorm = 0.6371, lr_0 = 5.2326e-04
Loss = 1.8157e-02, PNorm = 150.6402, GNorm = 0.8141, lr_0 = 5.2290e-04
Loss = 1.5958e-02, PNorm = 150.6765, GNorm = 0.4225, lr_0 = 5.2255e-04
Loss = 1.6777e-02, PNorm = 150.7155, GNorm = 0.3764, lr_0 = 5.2219e-04
Loss = 1.7127e-02, PNorm = 150.7553, GNorm = 0.2676, lr_0 = 5.2183e-04
Loss = 1.6893e-02, PNorm = 150.7925, GNorm = 0.2006, lr_0 = 5.2147e-04
Loss = 1.6486e-02, PNorm = 150.8308, GNorm = 0.5761, lr_0 = 5.2112e-04
Loss = 1.8161e-02, PNorm = 150.8735, GNorm = 0.3401, lr_0 = 5.2076e-04
Loss = 2.1030e-02, PNorm = 150.9137, GNorm = 0.6287, lr_0 = 5.2040e-04
Loss = 1.5840e-02, PNorm = 150.9512, GNorm = 0.2301, lr_0 = 5.2005e-04
Loss = 2.1919e-02, PNorm = 150.9876, GNorm = 0.3189, lr_0 = 5.1969e-04
Loss = 1.6675e-02, PNorm = 151.0220, GNorm = 0.3800, lr_0 = 5.1933e-04
Loss = 1.7611e-02, PNorm = 151.0624, GNorm = 0.2256, lr_0 = 5.1898e-04
Loss = 1.6100e-02, PNorm = 151.1011, GNorm = 0.2139, lr_0 = 5.1862e-04
Loss = 1.7193e-02, PNorm = 151.1391, GNorm = 0.3102, lr_0 = 5.1827e-04
Loss = 1.6932e-02, PNorm = 151.1754, GNorm = 0.2284, lr_0 = 5.1791e-04
Validation mae = 0.282484
Epoch 10
Loss = 1.3388e-02, PNorm = 151.2010, GNorm = 0.1983, lr_0 = 5.1756e-04
Loss = 1.4704e-02, PNorm = 151.2290, GNorm = 0.4111, lr_0 = 5.1720e-04
Loss = 1.4869e-02, PNorm = 151.2574, GNorm = 0.1725, lr_0 = 5.1685e-04
Loss = 1.4919e-02, PNorm = 151.2852, GNorm = 0.4672, lr_0 = 5.1649e-04
Loss = 1.5333e-02, PNorm = 151.3098, GNorm = 0.4450, lr_0 = 5.1614e-04
Loss = 1.5441e-02, PNorm = 151.3407, GNorm = 0.6166, lr_0 = 5.1579e-04
Loss = 1.4008e-02, PNorm = 151.3705, GNorm = 0.4896, lr_0 = 5.1543e-04
Loss = 1.3501e-02, PNorm = 151.3981, GNorm = 0.1241, lr_0 = 5.1508e-04
Loss = 1.3651e-02, PNorm = 151.4198, GNorm = 0.1564, lr_0 = 5.1473e-04
Loss = 1.6244e-02, PNorm = 151.4437, GNorm = 0.2605, lr_0 = 5.1437e-04
Loss = 1.3672e-02, PNorm = 151.4729, GNorm = 0.3687, lr_0 = 5.1402e-04
Loss = 1.5296e-02, PNorm = 151.5038, GNorm = 0.2712, lr_0 = 5.1367e-04
Loss = 1.4987e-02, PNorm = 151.5341, GNorm = 0.2242, lr_0 = 5.1332e-04
Loss = 1.4085e-02, PNorm = 151.5645, GNorm = 0.4373, lr_0 = 5.1297e-04
Loss = 1.2714e-02, PNorm = 151.5878, GNorm = 0.3293, lr_0 = 5.1262e-04
Loss = 1.8288e-02, PNorm = 151.6150, GNorm = 0.5917, lr_0 = 5.1226e-04
Loss = 1.7360e-02, PNorm = 151.6384, GNorm = 0.6897, lr_0 = 5.1191e-04
Loss = 1.5025e-02, PNorm = 151.6664, GNorm = 0.2510, lr_0 = 5.1156e-04
Loss = 1.5864e-02, PNorm = 151.6900, GNorm = 0.1818, lr_0 = 5.1121e-04
Loss = 1.5667e-02, PNorm = 151.7130, GNorm = 0.2013, lr_0 = 5.1086e-04
Loss = 1.5953e-02, PNorm = 151.7364, GNorm = 0.2601, lr_0 = 5.1051e-04
Loss = 1.3697e-02, PNorm = 151.7664, GNorm = 0.2140, lr_0 = 5.1016e-04
Loss = 1.2477e-02, PNorm = 151.7950, GNorm = 0.3528, lr_0 = 5.0981e-04
Loss = 1.2993e-02, PNorm = 151.8208, GNorm = 0.1334, lr_0 = 5.0946e-04
Loss = 1.3969e-02, PNorm = 151.8484, GNorm = 0.2535, lr_0 = 5.0911e-04
Loss = 1.2838e-02, PNorm = 151.8779, GNorm = 0.5328, lr_0 = 5.0877e-04
Loss = 1.3405e-02, PNorm = 151.9067, GNorm = 0.3097, lr_0 = 5.0842e-04
Loss = 1.4268e-02, PNorm = 151.9343, GNorm = 0.2311, lr_0 = 5.0807e-04
Loss = 1.2160e-02, PNorm = 151.9610, GNorm = 0.2935, lr_0 = 5.0772e-04
Loss = 1.2160e-02, PNorm = 151.9869, GNorm = 0.3431, lr_0 = 5.0737e-04
Loss = 1.5799e-02, PNorm = 152.0153, GNorm = 0.7547, lr_0 = 5.0703e-04
Loss = 1.5585e-02, PNorm = 152.0413, GNorm = 0.2521, lr_0 = 5.0668e-04
Loss = 1.3029e-02, PNorm = 152.0741, GNorm = 0.2208, lr_0 = 5.0633e-04
Loss = 1.3889e-02, PNorm = 152.1024, GNorm = 0.5630, lr_0 = 5.0598e-04
Loss = 1.2607e-02, PNorm = 152.1356, GNorm = 0.2609, lr_0 = 5.0564e-04
Loss = 1.5560e-02, PNorm = 152.1670, GNorm = 0.6440, lr_0 = 5.0529e-04
Loss = 1.2541e-02, PNorm = 152.1944, GNorm = 0.3733, lr_0 = 5.0494e-04
Loss = 1.4844e-02, PNorm = 152.2179, GNorm = 0.1070, lr_0 = 5.0460e-04
Loss = 1.3607e-02, PNorm = 152.2472, GNorm = 0.3625, lr_0 = 5.0425e-04
Loss = 1.3360e-02, PNorm = 152.2734, GNorm = 0.1178, lr_0 = 5.0391e-04
Loss = 1.2092e-02, PNorm = 152.3001, GNorm = 0.3522, lr_0 = 5.0356e-04
Loss = 1.5170e-02, PNorm = 152.3300, GNorm = 0.2894, lr_0 = 5.0322e-04
Loss = 1.3245e-02, PNorm = 152.3599, GNorm = 0.4122, lr_0 = 5.0287e-04
Loss = 1.2721e-02, PNorm = 152.3868, GNorm = 0.1167, lr_0 = 5.0253e-04
Loss = 1.3870e-02, PNorm = 152.4074, GNorm = 0.3090, lr_0 = 5.0218e-04
Loss = 1.1417e-02, PNorm = 152.4352, GNorm = 0.1864, lr_0 = 5.0184e-04
Loss = 1.3455e-02, PNorm = 152.4623, GNorm = 0.2339, lr_0 = 5.0150e-04
Loss = 1.3978e-02, PNorm = 152.4897, GNorm = 0.3337, lr_0 = 5.0115e-04
Loss = 1.3434e-02, PNorm = 152.5170, GNorm = 0.1298, lr_0 = 5.0081e-04
Loss = 1.2642e-02, PNorm = 152.5491, GNorm = 0.5642, lr_0 = 5.0047e-04
Loss = 1.3113e-02, PNorm = 152.5793, GNorm = 0.6254, lr_0 = 5.0012e-04
Loss = 1.5759e-02, PNorm = 152.6105, GNorm = 0.5215, lr_0 = 4.9978e-04
Loss = 1.4360e-02, PNorm = 152.6429, GNorm = 0.3356, lr_0 = 4.9944e-04
Loss = 1.4700e-02, PNorm = 152.6720, GNorm = 0.5164, lr_0 = 4.9910e-04
Loss = 1.5942e-02, PNorm = 152.7024, GNorm = 0.4132, lr_0 = 4.9875e-04
Loss = 1.5233e-02, PNorm = 152.7338, GNorm = 0.2783, lr_0 = 4.9841e-04
Loss = 1.4290e-02, PNorm = 152.7693, GNorm = 0.1628, lr_0 = 4.9807e-04
Loss = 1.5394e-02, PNorm = 152.8000, GNorm = 0.4877, lr_0 = 4.9773e-04
Loss = 1.6595e-02, PNorm = 152.8292, GNorm = 0.3702, lr_0 = 4.9739e-04
Loss = 1.3524e-02, PNorm = 152.8640, GNorm = 0.4298, lr_0 = 4.9705e-04
Loss = 1.5445e-02, PNorm = 152.8947, GNorm = 0.3141, lr_0 = 4.9671e-04
Loss = 1.7530e-02, PNorm = 152.9294, GNorm = 0.5760, lr_0 = 4.9637e-04
Loss = 1.6105e-02, PNorm = 152.9626, GNorm = 0.4754, lr_0 = 4.9603e-04
Loss = 1.2899e-02, PNorm = 152.9960, GNorm = 0.3221, lr_0 = 4.9569e-04
Loss = 1.3698e-02, PNorm = 153.0248, GNorm = 0.3944, lr_0 = 4.9535e-04
Loss = 1.4638e-02, PNorm = 153.0567, GNorm = 0.1808, lr_0 = 4.9501e-04
Loss = 1.6595e-02, PNorm = 153.0933, GNorm = 0.4600, lr_0 = 4.9467e-04
Loss = 1.4143e-02, PNorm = 153.1241, GNorm = 0.6902, lr_0 = 4.9433e-04
Loss = 1.1781e-02, PNorm = 153.1582, GNorm = 0.2046, lr_0 = 4.9399e-04
Loss = 1.9394e-02, PNorm = 153.1970, GNorm = 0.3085, lr_0 = 4.9365e-04
Loss = 1.3174e-02, PNorm = 153.2298, GNorm = 0.6937, lr_0 = 4.9332e-04
Loss = 1.4099e-02, PNorm = 153.2626, GNorm = 0.1530, lr_0 = 4.9298e-04
Loss = 1.5210e-02, PNorm = 153.2929, GNorm = 0.2429, lr_0 = 4.9264e-04
Loss = 1.4164e-02, PNorm = 153.3256, GNorm = 0.3937, lr_0 = 4.9230e-04
Loss = 1.5487e-02, PNorm = 153.3615, GNorm = 0.3110, lr_0 = 4.9197e-04
Loss = 1.5496e-02, PNorm = 153.3986, GNorm = 0.2893, lr_0 = 4.9163e-04
Loss = 1.1846e-02, PNorm = 153.4365, GNorm = 0.2508, lr_0 = 4.9129e-04
Loss = 1.3893e-02, PNorm = 153.4680, GNorm = 0.1377, lr_0 = 4.9095e-04
Loss = 1.9201e-02, PNorm = 153.4940, GNorm = 0.1622, lr_0 = 4.9062e-04
Loss = 1.3290e-02, PNorm = 153.5237, GNorm = 0.2947, lr_0 = 4.9028e-04
Loss = 1.4706e-02, PNorm = 153.5559, GNorm = 0.4563, lr_0 = 4.8995e-04
Loss = 1.3489e-02, PNorm = 153.5919, GNorm = 0.3745, lr_0 = 4.8961e-04
Loss = 1.6295e-02, PNorm = 153.6284, GNorm = 0.3095, lr_0 = 4.8928e-04
Loss = 1.2807e-02, PNorm = 153.6607, GNorm = 0.3274, lr_0 = 4.8894e-04
Loss = 1.7199e-02, PNorm = 153.6958, GNorm = 0.7069, lr_0 = 4.8861e-04
Loss = 1.4071e-02, PNorm = 153.7338, GNorm = 0.6181, lr_0 = 4.8827e-04
Loss = 1.3442e-02, PNorm = 153.7642, GNorm = 0.5190, lr_0 = 4.8794e-04
Loss = 1.4069e-02, PNorm = 153.7974, GNorm = 0.2559, lr_0 = 4.8760e-04
Loss = 1.2017e-02, PNorm = 153.8263, GNorm = 0.2085, lr_0 = 4.8727e-04
Loss = 1.4564e-02, PNorm = 153.8585, GNorm = 0.5495, lr_0 = 4.8693e-04
Loss = 1.5277e-02, PNorm = 153.8919, GNorm = 0.3106, lr_0 = 4.8660e-04
Loss = 1.1472e-02, PNorm = 153.9265, GNorm = 0.1865, lr_0 = 4.8627e-04
Loss = 1.2337e-02, PNorm = 153.9531, GNorm = 0.3599, lr_0 = 4.8593e-04
Loss = 1.6757e-02, PNorm = 153.9869, GNorm = 0.2984, lr_0 = 4.8560e-04
Loss = 1.4730e-02, PNorm = 154.0146, GNorm = 0.1835, lr_0 = 4.8527e-04
Loss = 1.4642e-02, PNorm = 154.0523, GNorm = 0.1462, lr_0 = 4.8494e-04
Loss = 1.3397e-02, PNorm = 154.0921, GNorm = 0.1889, lr_0 = 4.8460e-04
Loss = 1.3897e-02, PNorm = 154.1281, GNorm = 0.5324, lr_0 = 4.8427e-04
Loss = 1.5253e-02, PNorm = 154.1640, GNorm = 0.2014, lr_0 = 4.8394e-04
Loss = 1.4548e-02, PNorm = 154.1949, GNorm = 0.4508, lr_0 = 4.8361e-04
Loss = 1.5792e-02, PNorm = 154.2300, GNorm = 0.3142, lr_0 = 4.8328e-04
Loss = 1.2669e-02, PNorm = 154.2596, GNorm = 0.5536, lr_0 = 4.8295e-04
Loss = 1.4018e-02, PNorm = 154.2905, GNorm = 0.2369, lr_0 = 4.8262e-04
Loss = 1.4548e-02, PNorm = 154.3197, GNorm = 0.2774, lr_0 = 4.8228e-04
Loss = 1.4549e-02, PNorm = 154.3524, GNorm = 0.3513, lr_0 = 4.8195e-04
Loss = 1.3977e-02, PNorm = 154.3853, GNorm = 0.1722, lr_0 = 4.8162e-04
Loss = 1.3199e-02, PNorm = 154.4243, GNorm = 0.2590, lr_0 = 4.8129e-04
Loss = 1.4785e-02, PNorm = 154.4593, GNorm = 0.1775, lr_0 = 4.8096e-04
Loss = 1.5682e-02, PNorm = 154.4936, GNorm = 0.4441, lr_0 = 4.8064e-04
Loss = 1.5350e-02, PNorm = 154.5317, GNorm = 0.2342, lr_0 = 4.8031e-04
Loss = 1.2050e-02, PNorm = 154.5670, GNorm = 0.4980, lr_0 = 4.7998e-04
Loss = 1.3801e-02, PNorm = 154.6013, GNorm = 0.4320, lr_0 = 4.7965e-04
Loss = 1.1910e-02, PNorm = 154.6331, GNorm = 0.2483, lr_0 = 4.7932e-04
Loss = 1.4163e-02, PNorm = 154.6689, GNorm = 0.3987, lr_0 = 4.7899e-04
Loss = 1.4213e-02, PNorm = 154.7049, GNorm = 0.1419, lr_0 = 4.7866e-04
Loss = 1.3163e-02, PNorm = 154.7408, GNorm = 0.3723, lr_0 = 4.7833e-04
Loss = 1.4205e-02, PNorm = 154.7735, GNorm = 0.3816, lr_0 = 4.7801e-04
Loss = 1.7766e-02, PNorm = 154.8036, GNorm = 0.5325, lr_0 = 4.7768e-04
Loss = 1.4191e-02, PNorm = 154.8373, GNorm = 0.1424, lr_0 = 4.7735e-04
Loss = 1.4950e-02, PNorm = 154.8709, GNorm = 0.4254, lr_0 = 4.7703e-04
Validation mae = 0.282830
Epoch 11
Loss = 1.4721e-02, PNorm = 154.9028, GNorm = 0.5808, lr_0 = 4.7670e-04
Loss = 1.4540e-02, PNorm = 154.9308, GNorm = 0.4966, lr_0 = 4.7637e-04
Loss = 1.0388e-02, PNorm = 154.9564, GNorm = 0.2135, lr_0 = 4.7605e-04
Loss = 1.2960e-02, PNorm = 154.9801, GNorm = 0.2555, lr_0 = 4.7572e-04
Loss = 1.2736e-02, PNorm = 155.0061, GNorm = 0.6748, lr_0 = 4.7539e-04
Loss = 1.3012e-02, PNorm = 155.0249, GNorm = 0.4500, lr_0 = 4.7507e-04
Loss = 1.3326e-02, PNorm = 155.0464, GNorm = 0.2686, lr_0 = 4.7474e-04
Loss = 1.3680e-02, PNorm = 155.0660, GNorm = 0.1169, lr_0 = 4.7442e-04
Loss = 1.1131e-02, PNorm = 155.0894, GNorm = 0.2160, lr_0 = 4.7409e-04
Loss = 1.2132e-02, PNorm = 155.1132, GNorm = 0.3284, lr_0 = 4.7377e-04
Loss = 1.5036e-02, PNorm = 155.1365, GNorm = 0.2453, lr_0 = 4.7344e-04
Loss = 1.2162e-02, PNorm = 155.1626, GNorm = 0.4859, lr_0 = 4.7312e-04
Loss = 1.1003e-02, PNorm = 155.1839, GNorm = 0.3066, lr_0 = 4.7279e-04
Loss = 1.1687e-02, PNorm = 155.2106, GNorm = 0.2957, lr_0 = 4.7247e-04
Loss = 1.2971e-02, PNorm = 155.2347, GNorm = 0.3600, lr_0 = 4.7215e-04
Loss = 1.6682e-02, PNorm = 155.2606, GNorm = 0.6621, lr_0 = 4.7182e-04
Loss = 1.2074e-02, PNorm = 155.2883, GNorm = 0.1800, lr_0 = 4.7150e-04
Loss = 1.5740e-02, PNorm = 155.3159, GNorm = 0.2146, lr_0 = 4.7118e-04
Loss = 1.4181e-02, PNorm = 155.3413, GNorm = 0.1350, lr_0 = 4.7085e-04
Loss = 1.0750e-02, PNorm = 155.3689, GNorm = 0.2107, lr_0 = 4.7053e-04
Loss = 1.1958e-02, PNorm = 155.3929, GNorm = 0.1716, lr_0 = 4.7021e-04
Loss = 1.1840e-02, PNorm = 155.4129, GNorm = 0.2442, lr_0 = 4.6989e-04
Loss = 1.2883e-02, PNorm = 155.4303, GNorm = 0.3572, lr_0 = 4.6957e-04
Loss = 1.2218e-02, PNorm = 155.4531, GNorm = 0.2364, lr_0 = 4.6924e-04
Loss = 1.0725e-02, PNorm = 155.4781, GNorm = 0.3645, lr_0 = 4.6892e-04
Loss = 1.1424e-02, PNorm = 155.5050, GNorm = 0.2986, lr_0 = 4.6860e-04
Loss = 1.2372e-02, PNorm = 155.5290, GNorm = 0.1586, lr_0 = 4.6828e-04
Loss = 1.0933e-02, PNorm = 155.5532, GNorm = 0.1275, lr_0 = 4.6796e-04
Loss = 1.4297e-02, PNorm = 155.5783, GNorm = 0.3229, lr_0 = 4.6764e-04
Loss = 1.1846e-02, PNorm = 155.6064, GNorm = 0.5975, lr_0 = 4.6732e-04
Loss = 1.0173e-02, PNorm = 155.6353, GNorm = 0.2230, lr_0 = 4.6700e-04
Loss = 1.1947e-02, PNorm = 155.6586, GNorm = 0.3373, lr_0 = 4.6668e-04
Loss = 1.1872e-02, PNorm = 155.6827, GNorm = 0.2104, lr_0 = 4.6636e-04
Loss = 1.1007e-02, PNorm = 155.7050, GNorm = 0.3401, lr_0 = 4.6604e-04
Loss = 1.1320e-02, PNorm = 155.7309, GNorm = 0.5602, lr_0 = 4.6572e-04
Loss = 1.3872e-02, PNorm = 155.7549, GNorm = 0.3259, lr_0 = 4.6540e-04
Loss = 1.1530e-02, PNorm = 155.7799, GNorm = 0.7528, lr_0 = 4.6508e-04
Loss = 1.0416e-02, PNorm = 155.7990, GNorm = 0.4510, lr_0 = 4.6476e-04
Loss = 1.2409e-02, PNorm = 155.8252, GNorm = 0.2023, lr_0 = 4.6445e-04
Loss = 1.1648e-02, PNorm = 155.8540, GNorm = 0.3916, lr_0 = 4.6413e-04
Loss = 1.0452e-02, PNorm = 155.8800, GNorm = 0.3877, lr_0 = 4.6381e-04
Loss = 1.1388e-02, PNorm = 155.9013, GNorm = 0.3649, lr_0 = 4.6349e-04
Loss = 1.0192e-02, PNorm = 155.9216, GNorm = 0.1522, lr_0 = 4.6317e-04
Loss = 1.1514e-02, PNorm = 155.9428, GNorm = 0.3445, lr_0 = 4.6286e-04
Loss = 1.2082e-02, PNorm = 155.9698, GNorm = 0.3915, lr_0 = 4.6254e-04
Loss = 1.2096e-02, PNorm = 155.9966, GNorm = 0.2092, lr_0 = 4.6222e-04
Loss = 1.0912e-02, PNorm = 156.0261, GNorm = 0.2284, lr_0 = 4.6191e-04
Loss = 1.1691e-02, PNorm = 156.0528, GNorm = 0.1489, lr_0 = 4.6159e-04
Loss = 1.0793e-02, PNorm = 156.0795, GNorm = 0.2345, lr_0 = 4.6127e-04
Loss = 1.0794e-02, PNorm = 156.1065, GNorm = 0.1535, lr_0 = 4.6096e-04
Loss = 1.3093e-02, PNorm = 156.1344, GNorm = 0.2054, lr_0 = 4.6064e-04
Loss = 9.8036e-03, PNorm = 156.1626, GNorm = 0.1100, lr_0 = 4.6033e-04
Loss = 1.0666e-02, PNorm = 156.1843, GNorm = 0.4312, lr_0 = 4.6001e-04
Loss = 1.2729e-02, PNorm = 156.2084, GNorm = 0.3213, lr_0 = 4.5970e-04
Loss = 1.1359e-02, PNorm = 156.2346, GNorm = 0.4784, lr_0 = 4.5938e-04
Loss = 1.0833e-02, PNorm = 156.2633, GNorm = 0.7180, lr_0 = 4.5907e-04
Loss = 1.2143e-02, PNorm = 156.2876, GNorm = 0.6962, lr_0 = 4.5875e-04
Loss = 1.3557e-02, PNorm = 156.3218, GNorm = 0.5110, lr_0 = 4.5844e-04
Loss = 1.4234e-02, PNorm = 156.3504, GNorm = 0.1723, lr_0 = 4.5812e-04
Loss = 1.1233e-02, PNorm = 156.3789, GNorm = 0.2880, lr_0 = 4.5781e-04
Loss = 1.3473e-02, PNorm = 156.4045, GNorm = 0.1679, lr_0 = 4.5750e-04
Loss = 1.2982e-02, PNorm = 156.4349, GNorm = 0.2385, lr_0 = 4.5718e-04
Loss = 1.1570e-02, PNorm = 156.4648, GNorm = 0.4469, lr_0 = 4.5687e-04
Loss = 1.1396e-02, PNorm = 156.4921, GNorm = 0.4482, lr_0 = 4.5656e-04
Loss = 1.0769e-02, PNorm = 156.5181, GNorm = 0.3335, lr_0 = 4.5624e-04
Loss = 1.4484e-02, PNorm = 156.5464, GNorm = 0.3898, lr_0 = 4.5593e-04
Loss = 1.2672e-02, PNorm = 156.5766, GNorm = 0.4940, lr_0 = 4.5562e-04
Loss = 1.2390e-02, PNorm = 156.6055, GNorm = 0.2108, lr_0 = 4.5531e-04
Loss = 1.6267e-02, PNorm = 156.6298, GNorm = 0.4501, lr_0 = 4.5499e-04
Loss = 1.0240e-02, PNorm = 156.6565, GNorm = 0.2172, lr_0 = 4.5468e-04
Loss = 1.3493e-02, PNorm = 156.6858, GNorm = 0.4150, lr_0 = 4.5437e-04
Loss = 1.2164e-02, PNorm = 156.7133, GNorm = 0.4172, lr_0 = 4.5406e-04
Loss = 1.1241e-02, PNorm = 156.7396, GNorm = 0.3293, lr_0 = 4.5375e-04
Loss = 1.4935e-02, PNorm = 156.7643, GNorm = 0.5067, lr_0 = 4.5344e-04
Loss = 1.3709e-02, PNorm = 156.7883, GNorm = 0.3480, lr_0 = 4.5313e-04
Loss = 1.3094e-02, PNorm = 156.8140, GNorm = 0.5733, lr_0 = 4.5282e-04
Loss = 1.1418e-02, PNorm = 156.8422, GNorm = 0.1955, lr_0 = 4.5251e-04
Loss = 1.1678e-02, PNorm = 156.8707, GNorm = 0.4342, lr_0 = 4.5220e-04
Loss = 1.8792e-02, PNorm = 156.8996, GNorm = 0.7863, lr_0 = 4.5189e-04
Loss = 1.3819e-02, PNorm = 156.9265, GNorm = 0.4610, lr_0 = 4.5158e-04
Loss = 1.0372e-02, PNorm = 156.9521, GNorm = 0.3001, lr_0 = 4.5127e-04
Loss = 1.2272e-02, PNorm = 156.9790, GNorm = 0.4653, lr_0 = 4.5096e-04
Loss = 1.1373e-02, PNorm = 157.0090, GNorm = 0.4068, lr_0 = 4.5065e-04
Loss = 1.0472e-02, PNorm = 157.0300, GNorm = 0.2707, lr_0 = 4.5034e-04
Loss = 9.6902e-03, PNorm = 157.0589, GNorm = 0.2469, lr_0 = 4.5003e-04
Loss = 1.3916e-02, PNorm = 157.0855, GNorm = 0.5384, lr_0 = 4.4972e-04
Loss = 1.5595e-02, PNorm = 157.1163, GNorm = 0.1862, lr_0 = 4.4942e-04
Loss = 1.3004e-02, PNorm = 157.1416, GNorm = 0.2272, lr_0 = 4.4911e-04
Loss = 1.3120e-02, PNorm = 157.1682, GNorm = 0.6372, lr_0 = 4.4880e-04
Loss = 1.2262e-02, PNorm = 157.1929, GNorm = 0.6360, lr_0 = 4.4849e-04
Loss = 1.1272e-02, PNorm = 157.2247, GNorm = 0.2364, lr_0 = 4.4819e-04
Loss = 1.4623e-02, PNorm = 157.2523, GNorm = 0.2464, lr_0 = 4.4788e-04
Loss = 1.3281e-02, PNorm = 157.2786, GNorm = 0.3950, lr_0 = 4.4757e-04
Loss = 1.0997e-02, PNorm = 157.3088, GNorm = 0.3396, lr_0 = 4.4727e-04
Loss = 9.5661e-03, PNorm = 157.3403, GNorm = 0.3601, lr_0 = 4.4696e-04
Loss = 1.2312e-02, PNorm = 157.3691, GNorm = 0.5481, lr_0 = 4.4665e-04
Loss = 1.0270e-02, PNorm = 157.3986, GNorm = 0.3414, lr_0 = 4.4635e-04
Loss = 1.1270e-02, PNorm = 157.4237, GNorm = 0.1135, lr_0 = 4.4604e-04
Loss = 1.1321e-02, PNorm = 157.4489, GNorm = 0.4695, lr_0 = 4.4574e-04
Loss = 1.2648e-02, PNorm = 157.4753, GNorm = 0.4036, lr_0 = 4.4543e-04
Loss = 1.1102e-02, PNorm = 157.5063, GNorm = 0.1964, lr_0 = 4.4513e-04
Loss = 1.2995e-02, PNorm = 157.5343, GNorm = 0.2079, lr_0 = 4.4482e-04
Loss = 1.6511e-02, PNorm = 157.5601, GNorm = 0.4023, lr_0 = 4.4452e-04
Loss = 1.2552e-02, PNorm = 157.5876, GNorm = 0.4639, lr_0 = 4.4421e-04
Loss = 1.1820e-02, PNorm = 157.6142, GNorm = 0.2010, lr_0 = 4.4391e-04
Loss = 1.1657e-02, PNorm = 157.6444, GNorm = 0.2207, lr_0 = 4.4360e-04
Loss = 1.1022e-02, PNorm = 157.6750, GNorm = 0.2687, lr_0 = 4.4330e-04
Loss = 1.3167e-02, PNorm = 157.7058, GNorm = 0.4922, lr_0 = 4.4299e-04
Loss = 1.2910e-02, PNorm = 157.7402, GNorm = 0.2096, lr_0 = 4.4269e-04
Loss = 1.4290e-02, PNorm = 157.7759, GNorm = 0.4549, lr_0 = 4.4239e-04
Loss = 1.0476e-02, PNorm = 157.8156, GNorm = 0.4153, lr_0 = 4.4209e-04
Loss = 1.1967e-02, PNorm = 157.8468, GNorm = 0.1425, lr_0 = 4.4178e-04
Loss = 1.3138e-02, PNorm = 157.8783, GNorm = 0.5265, lr_0 = 4.4148e-04
Loss = 1.2555e-02, PNorm = 157.9101, GNorm = 0.2349, lr_0 = 4.4118e-04
Loss = 1.2119e-02, PNorm = 157.9434, GNorm = 0.4287, lr_0 = 4.4088e-04
Loss = 1.2011e-02, PNorm = 157.9730, GNorm = 0.2358, lr_0 = 4.4057e-04
Loss = 1.0020e-02, PNorm = 157.9993, GNorm = 0.4526, lr_0 = 4.4027e-04
Loss = 1.2256e-02, PNorm = 158.0234, GNorm = 0.2472, lr_0 = 4.3997e-04
Loss = 1.1301e-02, PNorm = 158.0487, GNorm = 0.2585, lr_0 = 4.3967e-04
Loss = 1.4601e-02, PNorm = 158.0773, GNorm = 0.2956, lr_0 = 4.3937e-04
Validation mae = 0.280853
Epoch 12
Loss = 1.3233e-02, PNorm = 158.1031, GNorm = 0.4193, lr_0 = 4.3907e-04
Loss = 1.2126e-02, PNorm = 158.1260, GNorm = 0.6718, lr_0 = 4.3877e-04
Loss = 1.0670e-02, PNorm = 158.1418, GNorm = 0.1445, lr_0 = 4.3846e-04
Loss = 1.1004e-02, PNorm = 158.1608, GNorm = 0.1216, lr_0 = 4.3816e-04
Loss = 1.0930e-02, PNorm = 158.1838, GNorm = 0.4271, lr_0 = 4.3786e-04
Loss = 1.1359e-02, PNorm = 158.2083, GNorm = 0.2019, lr_0 = 4.3756e-04
Loss = 1.1922e-02, PNorm = 158.2291, GNorm = 0.5462, lr_0 = 4.3726e-04
Loss = 1.0509e-02, PNorm = 158.2505, GNorm = 0.3563, lr_0 = 4.3696e-04
Loss = 1.0039e-02, PNorm = 158.2690, GNorm = 0.2345, lr_0 = 4.3667e-04
Loss = 1.1043e-02, PNorm = 158.2867, GNorm = 0.2523, lr_0 = 4.3637e-04
Loss = 1.1476e-02, PNorm = 158.3051, GNorm = 0.1607, lr_0 = 4.3607e-04
Loss = 8.6190e-03, PNorm = 158.3308, GNorm = 0.4048, lr_0 = 4.3577e-04
Loss = 9.4093e-03, PNorm = 158.3541, GNorm = 0.4614, lr_0 = 4.3547e-04
Loss = 1.2439e-02, PNorm = 158.3748, GNorm = 0.1440, lr_0 = 4.3517e-04
Loss = 1.3921e-02, PNorm = 158.3900, GNorm = 0.3469, lr_0 = 4.3487e-04
Loss = 1.0423e-02, PNorm = 158.4126, GNorm = 0.1307, lr_0 = 4.3458e-04
Loss = 1.0082e-02, PNorm = 158.4325, GNorm = 0.4148, lr_0 = 4.3428e-04
Loss = 1.0414e-02, PNorm = 158.4507, GNorm = 0.1381, lr_0 = 4.3398e-04
Loss = 1.1039e-02, PNorm = 158.4707, GNorm = 0.2005, lr_0 = 4.3368e-04
Loss = 1.0952e-02, PNorm = 158.4947, GNorm = 0.1775, lr_0 = 4.3339e-04
Loss = 8.5984e-03, PNorm = 158.5167, GNorm = 0.1997, lr_0 = 4.3309e-04
Loss = 9.3549e-03, PNorm = 158.5344, GNorm = 0.1295, lr_0 = 4.3279e-04
Loss = 8.4710e-03, PNorm = 158.5534, GNorm = 0.1768, lr_0 = 4.3250e-04
Loss = 9.3331e-03, PNorm = 158.5763, GNorm = 0.6494, lr_0 = 4.3220e-04
Loss = 8.3232e-03, PNorm = 158.5977, GNorm = 0.1482, lr_0 = 4.3190e-04
Loss = 9.4295e-03, PNorm = 158.6163, GNorm = 0.4325, lr_0 = 4.3161e-04
Loss = 8.7167e-03, PNorm = 158.6343, GNorm = 0.1494, lr_0 = 4.3131e-04
Loss = 8.9614e-03, PNorm = 158.6528, GNorm = 0.2593, lr_0 = 4.3102e-04
Loss = 9.5184e-03, PNorm = 158.6686, GNorm = 0.0816, lr_0 = 4.3072e-04
Loss = 9.5533e-03, PNorm = 158.6837, GNorm = 0.2207, lr_0 = 4.3043e-04
Loss = 9.2683e-03, PNorm = 158.7004, GNorm = 0.3000, lr_0 = 4.3013e-04
Loss = 8.6374e-03, PNorm = 158.7190, GNorm = 0.1901, lr_0 = 4.2984e-04
Loss = 1.2609e-02, PNorm = 158.7397, GNorm = 0.4359, lr_0 = 4.2954e-04
Loss = 9.5362e-03, PNorm = 158.7630, GNorm = 0.4539, lr_0 = 4.2925e-04
Loss = 1.0582e-02, PNorm = 158.7851, GNorm = 0.5802, lr_0 = 4.2895e-04
Loss = 9.0710e-03, PNorm = 158.8131, GNorm = 0.1361, lr_0 = 4.2866e-04
Loss = 9.6444e-03, PNorm = 158.8332, GNorm = 0.0962, lr_0 = 4.2837e-04
Loss = 1.2456e-02, PNorm = 158.8509, GNorm = 0.2926, lr_0 = 4.2807e-04
Loss = 8.6818e-03, PNorm = 158.8728, GNorm = 0.1911, lr_0 = 4.2778e-04
Loss = 8.7811e-03, PNorm = 158.8894, GNorm = 0.1009, lr_0 = 4.2749e-04
Loss = 8.8726e-03, PNorm = 158.9078, GNorm = 0.1044, lr_0 = 4.2719e-04
Loss = 9.1105e-03, PNorm = 158.9297, GNorm = 0.5167, lr_0 = 4.2690e-04
Loss = 1.1190e-02, PNorm = 158.9496, GNorm = 0.1092, lr_0 = 4.2661e-04
Loss = 9.8856e-03, PNorm = 158.9687, GNorm = 0.4511, lr_0 = 4.2632e-04
Loss = 1.0154e-02, PNorm = 158.9871, GNorm = 0.2824, lr_0 = 4.2602e-04
Loss = 1.0698e-02, PNorm = 159.0100, GNorm = 0.2905, lr_0 = 4.2573e-04
Loss = 1.0593e-02, PNorm = 159.0306, GNorm = 0.1762, lr_0 = 4.2544e-04
Loss = 9.9916e-03, PNorm = 159.0496, GNorm = 0.3028, lr_0 = 4.2515e-04
Loss = 9.8817e-03, PNorm = 159.0724, GNorm = 0.2011, lr_0 = 4.2486e-04
Loss = 9.0965e-03, PNorm = 159.0909, GNorm = 0.2450, lr_0 = 4.2457e-04
Loss = 1.0819e-02, PNorm = 159.1124, GNorm = 0.5385, lr_0 = 4.2428e-04
Loss = 8.7790e-03, PNorm = 159.1327, GNorm = 0.1613, lr_0 = 4.2399e-04
Loss = 1.0210e-02, PNorm = 159.1563, GNorm = 0.2267, lr_0 = 4.2370e-04
Loss = 1.2065e-02, PNorm = 159.1802, GNorm = 0.3933, lr_0 = 4.2340e-04
Loss = 1.0586e-02, PNorm = 159.2048, GNorm = 0.1231, lr_0 = 4.2311e-04
Loss = 9.2968e-03, PNorm = 159.2274, GNorm = 0.1306, lr_0 = 4.2283e-04
Loss = 7.7745e-03, PNorm = 159.2455, GNorm = 0.1251, lr_0 = 4.2254e-04
Loss = 8.4115e-03, PNorm = 159.2639, GNorm = 0.1130, lr_0 = 4.2225e-04
Loss = 1.0659e-02, PNorm = 159.2781, GNorm = 0.5065, lr_0 = 4.2196e-04
Loss = 8.4973e-03, PNorm = 159.2938, GNorm = 0.2474, lr_0 = 4.2167e-04
Loss = 9.1041e-03, PNorm = 159.3128, GNorm = 0.4831, lr_0 = 4.2138e-04
Loss = 9.2270e-03, PNorm = 159.3348, GNorm = 0.2619, lr_0 = 4.2109e-04
Loss = 1.0458e-02, PNorm = 159.3541, GNorm = 0.2855, lr_0 = 4.2080e-04
Loss = 9.7052e-03, PNorm = 159.3789, GNorm = 0.2276, lr_0 = 4.2051e-04
Loss = 1.1322e-02, PNorm = 159.4014, GNorm = 0.3682, lr_0 = 4.2023e-04
Loss = 8.6422e-03, PNorm = 159.4295, GNorm = 0.0975, lr_0 = 4.1994e-04
Loss = 1.3751e-02, PNorm = 159.4536, GNorm = 0.3724, lr_0 = 4.1965e-04
Loss = 9.1505e-03, PNorm = 159.4749, GNorm = 0.2844, lr_0 = 4.1936e-04
Loss = 9.9486e-03, PNorm = 159.4941, GNorm = 0.5260, lr_0 = 4.1907e-04
Loss = 7.7809e-03, PNorm = 159.5165, GNorm = 0.1238, lr_0 = 4.1879e-04
Loss = 9.9596e-03, PNorm = 159.5343, GNorm = 0.2353, lr_0 = 4.1850e-04
Loss = 9.9144e-03, PNorm = 159.5579, GNorm = 0.1769, lr_0 = 4.1821e-04
Loss = 9.6800e-03, PNorm = 159.5793, GNorm = 0.1581, lr_0 = 4.1793e-04
Loss = 1.0940e-02, PNorm = 159.6003, GNorm = 0.3354, lr_0 = 4.1764e-04
Loss = 1.3151e-02, PNorm = 159.6242, GNorm = 1.0472, lr_0 = 4.1736e-04
Loss = 9.4003e-03, PNorm = 159.6429, GNorm = 0.3536, lr_0 = 4.1707e-04
Loss = 1.3728e-02, PNorm = 159.6643, GNorm = 0.1137, lr_0 = 4.1678e-04
Loss = 9.0806e-03, PNorm = 159.6878, GNorm = 0.1312, lr_0 = 4.1650e-04
Loss = 1.1835e-02, PNorm = 159.7129, GNorm = 0.6578, lr_0 = 4.1621e-04
Loss = 1.2907e-02, PNorm = 159.7376, GNorm = 0.3227, lr_0 = 4.1593e-04
Loss = 1.1982e-02, PNorm = 159.7649, GNorm = 0.4088, lr_0 = 4.1564e-04
Loss = 1.6288e-02, PNorm = 159.7914, GNorm = 0.1643, lr_0 = 4.1536e-04
Loss = 8.4970e-03, PNorm = 159.8151, GNorm = 0.1920, lr_0 = 4.1507e-04
Loss = 1.0495e-02, PNorm = 159.8436, GNorm = 0.4032, lr_0 = 4.1479e-04
Loss = 9.9299e-03, PNorm = 159.8669, GNorm = 0.1165, lr_0 = 4.1450e-04
Loss = 9.8826e-03, PNorm = 159.8931, GNorm = 0.5792, lr_0 = 4.1422e-04
Loss = 9.1972e-03, PNorm = 159.9181, GNorm = 0.1604, lr_0 = 4.1394e-04
Loss = 9.8059e-03, PNorm = 159.9422, GNorm = 0.2604, lr_0 = 4.1365e-04
Loss = 1.1429e-02, PNorm = 159.9647, GNorm = 0.1322, lr_0 = 4.1337e-04
Loss = 9.8966e-03, PNorm = 159.9876, GNorm = 0.2118, lr_0 = 4.1309e-04
Loss = 8.5856e-03, PNorm = 160.0123, GNorm = 0.1152, lr_0 = 4.1280e-04
Loss = 9.5749e-03, PNorm = 160.0356, GNorm = 0.2671, lr_0 = 4.1252e-04
Loss = 1.1306e-02, PNorm = 160.0605, GNorm = 0.5298, lr_0 = 4.1224e-04
Loss = 1.0109e-02, PNorm = 160.0841, GNorm = 0.3400, lr_0 = 4.1196e-04
Loss = 1.1647e-02, PNorm = 160.1097, GNorm = 0.7140, lr_0 = 4.1167e-04
Loss = 1.1216e-02, PNorm = 160.1317, GNorm = 0.3414, lr_0 = 4.1139e-04
Loss = 8.9398e-03, PNorm = 160.1539, GNorm = 0.0836, lr_0 = 4.1111e-04
Loss = 1.0371e-02, PNorm = 160.1771, GNorm = 0.2674, lr_0 = 4.1083e-04
Loss = 8.8780e-03, PNorm = 160.1986, GNorm = 0.3788, lr_0 = 4.1055e-04
Loss = 8.6049e-03, PNorm = 160.2194, GNorm = 0.3969, lr_0 = 4.1027e-04
Loss = 1.0586e-02, PNorm = 160.2463, GNorm = 0.2367, lr_0 = 4.0998e-04
Loss = 1.0349e-02, PNorm = 160.2712, GNorm = 0.2581, lr_0 = 4.0970e-04
Loss = 1.1896e-02, PNorm = 160.3006, GNorm = 0.1510, lr_0 = 4.0942e-04
Loss = 8.7222e-03, PNorm = 160.3244, GNorm = 0.4071, lr_0 = 4.0914e-04
Loss = 9.1109e-03, PNorm = 160.3488, GNorm = 0.3706, lr_0 = 4.0886e-04
Loss = 1.0553e-02, PNorm = 160.3705, GNorm = 0.3655, lr_0 = 4.0858e-04
Loss = 9.2862e-03, PNorm = 160.3915, GNorm = 0.1344, lr_0 = 4.0830e-04
Loss = 8.1239e-03, PNorm = 160.4126, GNorm = 0.6421, lr_0 = 4.0802e-04
Loss = 9.1412e-03, PNorm = 160.4328, GNorm = 0.1250, lr_0 = 4.0774e-04
Loss = 1.1960e-02, PNorm = 160.4607, GNorm = 0.1713, lr_0 = 4.0746e-04
Loss = 1.6479e-02, PNorm = 160.4840, GNorm = 0.2518, lr_0 = 4.0718e-04
Loss = 8.7074e-03, PNorm = 160.5091, GNorm = 0.1441, lr_0 = 4.0691e-04
Loss = 9.3149e-03, PNorm = 160.5306, GNorm = 0.4613, lr_0 = 4.0663e-04
Loss = 1.2412e-02, PNorm = 160.5516, GNorm = 0.1926, lr_0 = 4.0635e-04
Loss = 8.9156e-03, PNorm = 160.5710, GNorm = 0.3352, lr_0 = 4.0607e-04
Loss = 1.0153e-02, PNorm = 160.5946, GNorm = 0.5079, lr_0 = 4.0579e-04
Loss = 1.2125e-02, PNorm = 160.6171, GNorm = 0.5937, lr_0 = 4.0551e-04
Loss = 9.9293e-03, PNorm = 160.6408, GNorm = 0.1685, lr_0 = 4.0524e-04
Loss = 1.0672e-02, PNorm = 160.6640, GNorm = 0.1794, lr_0 = 4.0496e-04
Loss = 8.9553e-03, PNorm = 160.6903, GNorm = 0.3212, lr_0 = 4.0468e-04
Validation mae = 0.280251
Epoch 13
Loss = 8.7133e-03, PNorm = 160.7090, GNorm = 0.3750, lr_0 = 4.0440e-04
Loss = 1.1360e-02, PNorm = 160.7263, GNorm = 0.3085, lr_0 = 4.0413e-04
Loss = 7.6886e-03, PNorm = 160.7421, GNorm = 0.3126, lr_0 = 4.0385e-04
Loss = 7.8080e-03, PNorm = 160.7592, GNorm = 0.1610, lr_0 = 4.0357e-04
Loss = 7.8228e-03, PNorm = 160.7758, GNorm = 0.2549, lr_0 = 4.0330e-04
Loss = 8.9609e-03, PNorm = 160.7890, GNorm = 0.4623, lr_0 = 4.0302e-04
Loss = 8.7256e-03, PNorm = 160.8060, GNorm = 0.2626, lr_0 = 4.0274e-04
Loss = 8.7476e-03, PNorm = 160.8256, GNorm = 0.1966, lr_0 = 4.0247e-04
Loss = 7.9157e-03, PNorm = 160.8425, GNorm = 0.4871, lr_0 = 4.0219e-04
Loss = 1.0652e-02, PNorm = 160.8569, GNorm = 0.3465, lr_0 = 4.0192e-04
Loss = 8.1821e-03, PNorm = 160.8719, GNorm = 0.2207, lr_0 = 4.0164e-04
Loss = 7.0846e-03, PNorm = 160.8903, GNorm = 0.1886, lr_0 = 4.0137e-04
Loss = 8.6214e-03, PNorm = 160.9045, GNorm = 0.1886, lr_0 = 4.0109e-04
Loss = 9.4086e-03, PNorm = 160.9236, GNorm = 0.2054, lr_0 = 4.0082e-04
Loss = 1.0157e-02, PNorm = 160.9373, GNorm = 0.3867, lr_0 = 4.0054e-04
Loss = 1.1075e-02, PNorm = 160.9553, GNorm = 0.1057, lr_0 = 4.0027e-04
Loss = 7.9570e-03, PNorm = 160.9757, GNorm = 0.4380, lr_0 = 3.9999e-04
Loss = 8.2459e-03, PNorm = 160.9954, GNorm = 0.0875, lr_0 = 3.9972e-04
Loss = 9.9488e-03, PNorm = 161.0129, GNorm = 0.3299, lr_0 = 3.9945e-04
Loss = 8.3171e-03, PNorm = 161.0293, GNorm = 0.1648, lr_0 = 3.9917e-04
Loss = 8.9198e-03, PNorm = 161.0468, GNorm = 0.2230, lr_0 = 3.9890e-04
Loss = 9.0743e-03, PNorm = 161.0590, GNorm = 0.1784, lr_0 = 3.9863e-04
Loss = 8.0551e-03, PNorm = 161.0770, GNorm = 0.1323, lr_0 = 3.9835e-04
Loss = 8.9290e-03, PNorm = 161.0953, GNorm = 0.3014, lr_0 = 3.9808e-04
Loss = 8.4991e-03, PNorm = 161.1145, GNorm = 0.2629, lr_0 = 3.9781e-04
Loss = 8.1525e-03, PNorm = 161.1321, GNorm = 0.4996, lr_0 = 3.9753e-04
Loss = 7.4725e-03, PNorm = 161.1468, GNorm = 0.3685, lr_0 = 3.9726e-04
Loss = 8.9636e-03, PNorm = 161.1618, GNorm = 0.1345, lr_0 = 3.9699e-04
Loss = 7.3859e-03, PNorm = 161.1771, GNorm = 0.3420, lr_0 = 3.9672e-04
Loss = 8.6060e-03, PNorm = 161.1942, GNorm = 0.1089, lr_0 = 3.9645e-04
Loss = 7.6199e-03, PNorm = 161.2070, GNorm = 0.1532, lr_0 = 3.9617e-04
Loss = 8.0207e-03, PNorm = 161.2253, GNorm = 0.2555, lr_0 = 3.9590e-04
Loss = 7.3010e-03, PNorm = 161.2420, GNorm = 0.2889, lr_0 = 3.9563e-04
Loss = 7.0293e-03, PNorm = 161.2580, GNorm = 0.0906, lr_0 = 3.9536e-04
Loss = 8.8581e-03, PNorm = 161.2751, GNorm = 0.2159, lr_0 = 3.9509e-04
Loss = 8.0501e-03, PNorm = 161.2929, GNorm = 0.1104, lr_0 = 3.9482e-04
Loss = 8.8995e-03, PNorm = 161.3097, GNorm = 0.1767, lr_0 = 3.9455e-04
Loss = 6.7298e-03, PNorm = 161.3289, GNorm = 0.1646, lr_0 = 3.9428e-04
Loss = 8.3309e-03, PNorm = 161.3460, GNorm = 0.3075, lr_0 = 3.9401e-04
Loss = 9.6644e-03, PNorm = 161.3654, GNorm = 0.4591, lr_0 = 3.9374e-04
Loss = 7.4027e-03, PNorm = 161.3843, GNorm = 0.1281, lr_0 = 3.9347e-04
Loss = 7.3869e-03, PNorm = 161.4013, GNorm = 0.5929, lr_0 = 3.9320e-04
Loss = 9.9877e-03, PNorm = 161.4142, GNorm = 0.4473, lr_0 = 3.9293e-04
Loss = 8.3507e-03, PNorm = 161.4292, GNorm = 0.4854, lr_0 = 3.9266e-04
Loss = 7.8666e-03, PNorm = 161.4458, GNorm = 0.1593, lr_0 = 3.9239e-04
Loss = 8.4909e-03, PNorm = 161.4654, GNorm = 0.6446, lr_0 = 3.9212e-04
Loss = 7.2932e-03, PNorm = 161.4825, GNorm = 0.2001, lr_0 = 3.9185e-04
Loss = 7.1517e-03, PNorm = 161.4971, GNorm = 0.3740, lr_0 = 3.9159e-04
Loss = 7.8187e-03, PNorm = 161.5126, GNorm = 0.3041, lr_0 = 3.9132e-04
Loss = 7.2010e-03, PNorm = 161.5274, GNorm = 0.1752, lr_0 = 3.9105e-04
Loss = 8.0628e-03, PNorm = 161.5459, GNorm = 0.4364, lr_0 = 3.9078e-04
Loss = 7.4502e-03, PNorm = 161.5634, GNorm = 0.2398, lr_0 = 3.9051e-04
Loss = 9.4633e-03, PNorm = 161.5816, GNorm = 0.3768, lr_0 = 3.9025e-04
Loss = 8.5281e-03, PNorm = 161.6009, GNorm = 0.1001, lr_0 = 3.8998e-04
Loss = 8.7186e-03, PNorm = 161.6174, GNorm = 0.7185, lr_0 = 3.8971e-04
Loss = 9.0572e-03, PNorm = 161.6374, GNorm = 0.1512, lr_0 = 3.8945e-04
Loss = 9.9754e-03, PNorm = 161.6581, GNorm = 0.4843, lr_0 = 3.8918e-04
Loss = 1.0644e-02, PNorm = 161.6760, GNorm = 0.2336, lr_0 = 3.8891e-04
Loss = 6.6037e-03, PNorm = 161.6952, GNorm = 0.2052, lr_0 = 3.8865e-04
Loss = 7.8764e-03, PNorm = 161.7140, GNorm = 0.2996, lr_0 = 3.8838e-04
Loss = 8.2735e-03, PNorm = 161.7311, GNorm = 0.3044, lr_0 = 3.8811e-04
Loss = 7.9936e-03, PNorm = 161.7452, GNorm = 0.1530, lr_0 = 3.8785e-04
Loss = 9.5899e-03, PNorm = 161.7616, GNorm = 0.1746, lr_0 = 3.8758e-04
Loss = 1.1069e-02, PNorm = 161.7793, GNorm = 0.1798, lr_0 = 3.8732e-04
Loss = 7.5823e-03, PNorm = 161.7977, GNorm = 0.2312, lr_0 = 3.8705e-04
Loss = 7.4252e-03, PNorm = 161.8148, GNorm = 0.2201, lr_0 = 3.8679e-04
Loss = 1.0204e-02, PNorm = 161.8331, GNorm = 0.3941, lr_0 = 3.8652e-04
Loss = 7.9398e-03, PNorm = 161.8508, GNorm = 0.3243, lr_0 = 3.8626e-04
Loss = 8.5809e-03, PNorm = 161.8720, GNorm = 0.1929, lr_0 = 3.8599e-04
Loss = 1.0583e-02, PNorm = 161.8954, GNorm = 0.3386, lr_0 = 3.8573e-04
Loss = 6.6139e-03, PNorm = 161.9163, GNorm = 0.1807, lr_0 = 3.8546e-04
Loss = 7.7023e-03, PNorm = 161.9356, GNorm = 0.2370, lr_0 = 3.8520e-04
Loss = 8.8823e-03, PNorm = 161.9520, GNorm = 0.1789, lr_0 = 3.8493e-04
Loss = 9.9229e-03, PNorm = 161.9721, GNorm = 0.2737, lr_0 = 3.8467e-04
Loss = 8.0351e-03, PNorm = 161.9907, GNorm = 0.1889, lr_0 = 3.8441e-04
Loss = 8.2839e-03, PNorm = 162.0096, GNorm = 0.0976, lr_0 = 3.8414e-04
Loss = 9.5957e-03, PNorm = 162.0295, GNorm = 0.2196, lr_0 = 3.8388e-04
Loss = 1.1859e-02, PNorm = 162.0482, GNorm = 0.2837, lr_0 = 3.8362e-04
Loss = 8.0209e-03, PNorm = 162.0641, GNorm = 0.2184, lr_0 = 3.8336e-04
Loss = 8.0791e-03, PNorm = 162.0789, GNorm = 0.1616, lr_0 = 3.8309e-04
Loss = 9.6641e-03, PNorm = 162.0944, GNorm = 0.2366, lr_0 = 3.8283e-04
Loss = 7.4550e-03, PNorm = 162.1101, GNorm = 0.3350, lr_0 = 3.8257e-04
Loss = 8.8850e-03, PNorm = 162.1276, GNorm = 0.1147, lr_0 = 3.8231e-04
Loss = 9.5130e-03, PNorm = 162.1450, GNorm = 0.1596, lr_0 = 3.8204e-04
Loss = 9.2589e-03, PNorm = 162.1618, GNorm = 0.2313, lr_0 = 3.8178e-04
Loss = 8.5447e-03, PNorm = 162.1799, GNorm = 0.2217, lr_0 = 3.8152e-04
Loss = 6.4728e-03, PNorm = 162.1984, GNorm = 0.1096, lr_0 = 3.8126e-04
Loss = 8.9364e-03, PNorm = 162.2156, GNorm = 0.2333, lr_0 = 3.8100e-04
Loss = 8.3655e-03, PNorm = 162.2330, GNorm = 0.3423, lr_0 = 3.8074e-04
Loss = 8.5662e-03, PNorm = 162.2536, GNorm = 0.3826, lr_0 = 3.8048e-04
Loss = 1.1685e-02, PNorm = 162.2777, GNorm = 0.1432, lr_0 = 3.8022e-04
Loss = 9.2234e-03, PNorm = 162.2975, GNorm = 0.4120, lr_0 = 3.7995e-04
Loss = 8.1615e-03, PNorm = 162.3125, GNorm = 0.3796, lr_0 = 3.7969e-04
Loss = 7.7728e-03, PNorm = 162.3256, GNorm = 0.2932, lr_0 = 3.7943e-04
Loss = 8.6643e-03, PNorm = 162.3420, GNorm = 0.3287, lr_0 = 3.7917e-04
Loss = 1.0212e-02, PNorm = 162.3640, GNorm = 0.1037, lr_0 = 3.7891e-04
Loss = 7.4249e-03, PNorm = 162.3855, GNorm = 0.1122, lr_0 = 3.7866e-04
Loss = 7.4603e-03, PNorm = 162.4046, GNorm = 0.3190, lr_0 = 3.7840e-04
Loss = 7.1599e-03, PNorm = 162.4203, GNorm = 0.1105, lr_0 = 3.7814e-04
Loss = 9.6462e-03, PNorm = 162.4374, GNorm = 0.2960, lr_0 = 3.7788e-04
Loss = 7.5763e-03, PNorm = 162.4574, GNorm = 0.0979, lr_0 = 3.7762e-04
Loss = 8.3975e-03, PNorm = 162.4757, GNorm = 0.3890, lr_0 = 3.7736e-04
Loss = 7.8978e-03, PNorm = 162.4949, GNorm = 0.4749, lr_0 = 3.7710e-04
Loss = 8.3503e-03, PNorm = 162.5152, GNorm = 0.1708, lr_0 = 3.7684e-04
Loss = 8.3621e-03, PNorm = 162.5370, GNorm = 0.1031, lr_0 = 3.7659e-04
Loss = 6.9337e-03, PNorm = 162.5587, GNorm = 0.5222, lr_0 = 3.7633e-04
Loss = 1.2770e-02, PNorm = 162.5771, GNorm = 0.4626, lr_0 = 3.7607e-04
Loss = 8.2590e-03, PNorm = 162.5969, GNorm = 0.1183, lr_0 = 3.7581e-04
Loss = 5.7520e-03, PNorm = 162.6196, GNorm = 0.1273, lr_0 = 3.7555e-04
Loss = 9.3989e-03, PNorm = 162.6405, GNorm = 0.4682, lr_0 = 3.7530e-04
Loss = 8.4869e-03, PNorm = 162.6625, GNorm = 0.1724, lr_0 = 3.7504e-04
Loss = 9.1420e-03, PNorm = 162.6835, GNorm = 0.2377, lr_0 = 3.7478e-04
Loss = 9.4115e-03, PNorm = 162.7068, GNorm = 0.2379, lr_0 = 3.7453e-04
Loss = 8.3398e-03, PNorm = 162.7284, GNorm = 0.3123, lr_0 = 3.7427e-04
Loss = 1.0054e-02, PNorm = 162.7500, GNorm = 0.1241, lr_0 = 3.7401e-04
Loss = 8.1583e-03, PNorm = 162.7715, GNorm = 0.3609, lr_0 = 3.7376e-04
Loss = 7.7212e-03, PNorm = 162.7877, GNorm = 0.1790, lr_0 = 3.7350e-04
Loss = 8.9640e-03, PNorm = 162.8081, GNorm = 0.2793, lr_0 = 3.7325e-04
Loss = 8.8804e-03, PNorm = 162.8261, GNorm = 0.4877, lr_0 = 3.7299e-04
Loss = 8.1200e-03, PNorm = 162.8470, GNorm = 0.1773, lr_0 = 3.7273e-04
Validation mae = 0.279703
Epoch 14
Loss = 6.6842e-03, PNorm = 162.8625, GNorm = 0.0896, lr_0 = 3.7248e-04
Loss = 8.0605e-03, PNorm = 162.8793, GNorm = 0.2442, lr_0 = 3.7222e-04
Loss = 7.8558e-03, PNorm = 162.8917, GNorm = 0.2538, lr_0 = 3.7197e-04
Loss = 7.3844e-03, PNorm = 162.9070, GNorm = 0.1945, lr_0 = 3.7171e-04
Loss = 8.0724e-03, PNorm = 162.9198, GNorm = 0.1726, lr_0 = 3.7146e-04
Loss = 7.7014e-03, PNorm = 162.9344, GNorm = 0.2682, lr_0 = 3.7120e-04
Loss = 7.2537e-03, PNorm = 162.9452, GNorm = 0.3139, lr_0 = 3.7095e-04
Loss = 6.9011e-03, PNorm = 162.9594, GNorm = 0.1521, lr_0 = 3.7070e-04
Loss = 6.5287e-03, PNorm = 162.9757, GNorm = 0.3869, lr_0 = 3.7044e-04
Loss = 7.3495e-03, PNorm = 162.9891, GNorm = 0.1462, lr_0 = 3.7019e-04
Loss = 6.7347e-03, PNorm = 163.0000, GNorm = 0.2573, lr_0 = 3.6993e-04
Loss = 7.5192e-03, PNorm = 163.0134, GNorm = 0.4027, lr_0 = 3.6968e-04
Loss = 8.8447e-03, PNorm = 163.0269, GNorm = 0.1702, lr_0 = 3.6943e-04
Loss = 7.1331e-03, PNorm = 163.0420, GNorm = 0.2959, lr_0 = 3.6917e-04
Loss = 1.0838e-02, PNorm = 163.0559, GNorm = 0.1236, lr_0 = 3.6892e-04
Loss = 7.3601e-03, PNorm = 163.0706, GNorm = 0.1703, lr_0 = 3.6867e-04
Loss = 7.9414e-03, PNorm = 163.0851, GNorm = 0.0971, lr_0 = 3.6842e-04
Loss = 8.5699e-03, PNorm = 163.1001, GNorm = 0.1778, lr_0 = 3.6816e-04
Loss = 8.8067e-03, PNorm = 163.1114, GNorm = 0.5325, lr_0 = 3.6791e-04
Loss = 6.3918e-03, PNorm = 163.1262, GNorm = 0.1946, lr_0 = 3.6766e-04
Loss = 6.4589e-03, PNorm = 163.1408, GNorm = 0.2667, lr_0 = 3.6741e-04
Loss = 8.5661e-03, PNorm = 163.1538, GNorm = 0.1750, lr_0 = 3.6716e-04
Loss = 7.0088e-03, PNorm = 163.1676, GNorm = 0.1930, lr_0 = 3.6690e-04
Loss = 6.8046e-03, PNorm = 163.1816, GNorm = 0.1056, lr_0 = 3.6665e-04
Loss = 7.9220e-03, PNorm = 163.1979, GNorm = 0.3240, lr_0 = 3.6640e-04
Loss = 7.2315e-03, PNorm = 163.2129, GNorm = 0.2008, lr_0 = 3.6615e-04
Loss = 6.8785e-03, PNorm = 163.2272, GNorm = 0.1710, lr_0 = 3.6590e-04
Loss = 6.2970e-03, PNorm = 163.2420, GNorm = 0.1442, lr_0 = 3.6565e-04
Loss = 6.5425e-03, PNorm = 163.2570, GNorm = 0.2787, lr_0 = 3.6540e-04
Loss = 7.6048e-03, PNorm = 163.2686, GNorm = 0.4069, lr_0 = 3.6515e-04
Loss = 7.0016e-03, PNorm = 163.2848, GNorm = 0.1422, lr_0 = 3.6490e-04
Loss = 6.0423e-03, PNorm = 163.2994, GNorm = 0.2290, lr_0 = 3.6465e-04
Loss = 8.5073e-03, PNorm = 163.3095, GNorm = 0.3125, lr_0 = 3.6440e-04
Loss = 7.7192e-03, PNorm = 163.3243, GNorm = 0.2312, lr_0 = 3.6415e-04
Loss = 8.8072e-03, PNorm = 163.3388, GNorm = 0.2938, lr_0 = 3.6390e-04
Loss = 6.0436e-03, PNorm = 163.3556, GNorm = 0.1615, lr_0 = 3.6365e-04
Loss = 7.8916e-03, PNorm = 163.3703, GNorm = 0.1227, lr_0 = 3.6340e-04
Loss = 8.2981e-03, PNorm = 163.3860, GNorm = 0.1690, lr_0 = 3.6315e-04
Loss = 7.1755e-03, PNorm = 163.4023, GNorm = 0.4519, lr_0 = 3.6290e-04
Loss = 7.3241e-03, PNorm = 163.4154, GNorm = 0.1360, lr_0 = 3.6266e-04
Loss = 6.5996e-03, PNorm = 163.4330, GNorm = 0.3373, lr_0 = 3.6241e-04
Loss = 7.3323e-03, PNorm = 163.4487, GNorm = 0.4062, lr_0 = 3.6216e-04
Loss = 6.6642e-03, PNorm = 163.4665, GNorm = 0.3754, lr_0 = 3.6191e-04
Loss = 8.3382e-03, PNorm = 163.4790, GNorm = 0.1852, lr_0 = 3.6166e-04
Loss = 6.6484e-03, PNorm = 163.4977, GNorm = 0.3188, lr_0 = 3.6141e-04
Loss = 6.4921e-03, PNorm = 163.5154, GNorm = 0.1613, lr_0 = 3.6117e-04
Loss = 7.2499e-03, PNorm = 163.5322, GNorm = 0.1082, lr_0 = 3.6092e-04
Loss = 6.5588e-03, PNorm = 163.5473, GNorm = 0.2682, lr_0 = 3.6067e-04
Loss = 8.1360e-03, PNorm = 163.5597, GNorm = 0.3914, lr_0 = 3.6043e-04
Loss = 9.0418e-03, PNorm = 163.5754, GNorm = 0.3306, lr_0 = 3.6018e-04
Loss = 8.0900e-03, PNorm = 163.5911, GNorm = 0.1786, lr_0 = 3.5993e-04
Loss = 6.1514e-03, PNorm = 163.6076, GNorm = 0.4363, lr_0 = 3.5969e-04
Loss = 7.5426e-03, PNorm = 163.6250, GNorm = 0.6027, lr_0 = 3.5944e-04
Loss = 7.1923e-03, PNorm = 163.6442, GNorm = 0.2933, lr_0 = 3.5919e-04
Loss = 9.9027e-03, PNorm = 163.6582, GNorm = 0.8521, lr_0 = 3.5895e-04
Loss = 8.8930e-03, PNorm = 163.6763, GNorm = 0.2707, lr_0 = 3.5870e-04
Loss = 7.4246e-03, PNorm = 163.6920, GNorm = 0.3736, lr_0 = 3.5845e-04
Loss = 6.8808e-03, PNorm = 163.7093, GNorm = 0.2220, lr_0 = 3.5821e-04
Loss = 6.5425e-03, PNorm = 163.7264, GNorm = 0.1335, lr_0 = 3.5796e-04
Loss = 8.2403e-03, PNorm = 163.7406, GNorm = 0.4283, lr_0 = 3.5772e-04
Loss = 6.3312e-03, PNorm = 163.7593, GNorm = 0.2412, lr_0 = 3.5747e-04
Loss = 6.9735e-03, PNorm = 163.7781, GNorm = 0.2661, lr_0 = 3.5723e-04
Loss = 7.4967e-03, PNorm = 163.7925, GNorm = 0.1796, lr_0 = 3.5698e-04
Loss = 8.2161e-03, PNorm = 163.8043, GNorm = 0.3831, lr_0 = 3.5674e-04
Loss = 5.7729e-03, PNorm = 163.8181, GNorm = 0.0905, lr_0 = 3.5650e-04
Loss = 6.0395e-03, PNorm = 163.8309, GNorm = 0.1996, lr_0 = 3.5625e-04
Loss = 6.7921e-03, PNorm = 163.8423, GNorm = 0.3589, lr_0 = 3.5601e-04
Loss = 7.5265e-03, PNorm = 163.8556, GNorm = 0.2103, lr_0 = 3.5576e-04
Loss = 5.3556e-03, PNorm = 163.8704, GNorm = 0.1737, lr_0 = 3.5552e-04
Loss = 7.2054e-03, PNorm = 163.8870, GNorm = 0.4920, lr_0 = 3.5528e-04
Loss = 8.4277e-03, PNorm = 163.9005, GNorm = 0.2034, lr_0 = 3.5503e-04
Loss = 5.9079e-03, PNorm = 163.9166, GNorm = 0.2122, lr_0 = 3.5479e-04
Loss = 5.7132e-03, PNorm = 163.9320, GNorm = 0.0982, lr_0 = 3.5455e-04
Loss = 7.6765e-03, PNorm = 163.9523, GNorm = 0.2560, lr_0 = 3.5430e-04
Loss = 6.4954e-03, PNorm = 163.9725, GNorm = 0.1337, lr_0 = 3.5406e-04
Loss = 6.6391e-03, PNorm = 163.9906, GNorm = 0.5331, lr_0 = 3.5382e-04
Loss = 6.9721e-03, PNorm = 164.0029, GNorm = 0.1549, lr_0 = 3.5358e-04
Loss = 7.1130e-03, PNorm = 164.0158, GNorm = 0.2691, lr_0 = 3.5333e-04
Loss = 6.7594e-03, PNorm = 164.0300, GNorm = 0.1123, lr_0 = 3.5309e-04
Loss = 8.6799e-03, PNorm = 164.0440, GNorm = 0.3386, lr_0 = 3.5285e-04
Loss = 7.2089e-03, PNorm = 164.0589, GNorm = 0.3671, lr_0 = 3.5261e-04
Loss = 7.3785e-03, PNorm = 164.0726, GNorm = 0.4223, lr_0 = 3.5237e-04
Loss = 8.0512e-03, PNorm = 164.0892, GNorm = 0.2366, lr_0 = 3.5212e-04
Loss = 6.0828e-03, PNorm = 164.1049, GNorm = 0.1624, lr_0 = 3.5188e-04
Loss = 6.7135e-03, PNorm = 164.1194, GNorm = 0.1635, lr_0 = 3.5164e-04
Loss = 5.6283e-03, PNorm = 164.1365, GNorm = 0.2843, lr_0 = 3.5140e-04
Loss = 8.2296e-03, PNorm = 164.1504, GNorm = 0.3959, lr_0 = 3.5116e-04
Loss = 7.2509e-03, PNorm = 164.1654, GNorm = 0.1447, lr_0 = 3.5092e-04
Loss = 6.0184e-03, PNorm = 164.1809, GNorm = 0.1162, lr_0 = 3.5068e-04
Loss = 7.4931e-03, PNorm = 164.1963, GNorm = 0.1816, lr_0 = 3.5044e-04
Loss = 7.3174e-03, PNorm = 164.2128, GNorm = 0.2754, lr_0 = 3.5020e-04
Loss = 5.9534e-03, PNorm = 164.2296, GNorm = 0.1746, lr_0 = 3.4996e-04
Loss = 5.7269e-03, PNorm = 164.2437, GNorm = 0.3104, lr_0 = 3.4972e-04
Loss = 5.6913e-03, PNorm = 164.2595, GNorm = 0.1296, lr_0 = 3.4948e-04
Loss = 1.0580e-02, PNorm = 164.2784, GNorm = 0.2507, lr_0 = 3.4924e-04
Loss = 8.4738e-03, PNorm = 164.2929, GNorm = 0.0907, lr_0 = 3.4900e-04
Loss = 8.2667e-03, PNorm = 164.3116, GNorm = 0.4382, lr_0 = 3.4876e-04
Loss = 7.0388e-03, PNorm = 164.3316, GNorm = 0.1645, lr_0 = 3.4852e-04
Loss = 5.4089e-03, PNorm = 164.3512, GNorm = 0.1058, lr_0 = 3.4828e-04
Loss = 6.9450e-03, PNorm = 164.3654, GNorm = 0.1406, lr_0 = 3.4805e-04
Loss = 6.8001e-03, PNorm = 164.3796, GNorm = 0.3238, lr_0 = 3.4781e-04
Loss = 6.3592e-03, PNorm = 164.3922, GNorm = 0.3161, lr_0 = 3.4757e-04
Loss = 6.7946e-03, PNorm = 164.4094, GNorm = 0.5898, lr_0 = 3.4733e-04
Loss = 5.9309e-03, PNorm = 164.4282, GNorm = 0.3222, lr_0 = 3.4709e-04
Loss = 6.0998e-03, PNorm = 164.4469, GNorm = 0.3360, lr_0 = 3.4686e-04
Loss = 6.5606e-03, PNorm = 164.4624, GNorm = 0.2733, lr_0 = 3.4662e-04
Loss = 5.5308e-03, PNorm = 164.4774, GNorm = 0.2811, lr_0 = 3.4638e-04
Loss = 5.9295e-03, PNorm = 164.4935, GNorm = 0.1877, lr_0 = 3.4614e-04
Loss = 9.0762e-03, PNorm = 164.5055, GNorm = 0.3269, lr_0 = 3.4591e-04
Loss = 9.4484e-03, PNorm = 164.5189, GNorm = 0.2299, lr_0 = 3.4567e-04
Loss = 7.0637e-03, PNorm = 164.5341, GNorm = 0.1089, lr_0 = 3.4543e-04
Loss = 7.0855e-03, PNorm = 164.5517, GNorm = 0.5076, lr_0 = 3.4520e-04
Loss = 5.6028e-03, PNorm = 164.5689, GNorm = 0.1348, lr_0 = 3.4496e-04
Loss = 6.5205e-03, PNorm = 164.5861, GNorm = 0.1334, lr_0 = 3.4472e-04
Loss = 7.6358e-03, PNorm = 164.6013, GNorm = 0.1038, lr_0 = 3.4449e-04
Loss = 6.9407e-03, PNorm = 164.6155, GNorm = 0.1738, lr_0 = 3.4425e-04
Loss = 7.4333e-03, PNorm = 164.6317, GNorm = 0.1104, lr_0 = 3.4402e-04
Loss = 6.1183e-03, PNorm = 164.6482, GNorm = 0.3116, lr_0 = 3.4378e-04
Loss = 6.0491e-03, PNorm = 164.6663, GNorm = 0.1271, lr_0 = 3.4354e-04
Loss = 1.0605e-02, PNorm = 164.6827, GNorm = 0.3337, lr_0 = 3.4331e-04
Validation mae = 0.279745
Epoch 15
Loss = 5.8975e-03, PNorm = 164.6911, GNorm = 0.1727, lr_0 = 3.4307e-04
Loss = 8.4354e-03, PNorm = 164.6992, GNorm = 0.5061, lr_0 = 3.4284e-04
Loss = 6.7682e-03, PNorm = 164.7100, GNorm = 0.2761, lr_0 = 3.4260e-04
Loss = 6.4391e-03, PNorm = 164.7209, GNorm = 0.1192, lr_0 = 3.4237e-04
Loss = 5.6118e-03, PNorm = 164.7332, GNorm = 0.4003, lr_0 = 3.4213e-04
Loss = 7.2382e-03, PNorm = 164.7451, GNorm = 0.3558, lr_0 = 3.4190e-04
Loss = 5.8633e-03, PNorm = 164.7568, GNorm = 0.2458, lr_0 = 3.4167e-04
Loss = 5.3255e-03, PNorm = 164.7713, GNorm = 0.3190, lr_0 = 3.4143e-04
Loss = 5.5646e-03, PNorm = 164.7814, GNorm = 0.4062, lr_0 = 3.4120e-04
Loss = 6.7030e-03, PNorm = 164.7908, GNorm = 0.4516, lr_0 = 3.4096e-04
Loss = 5.6402e-03, PNorm = 164.8020, GNorm = 0.1326, lr_0 = 3.4073e-04
Loss = 5.6634e-03, PNorm = 164.8145, GNorm = 0.1828, lr_0 = 3.4050e-04
Loss = 6.2167e-03, PNorm = 164.8260, GNorm = 0.3056, lr_0 = 3.4026e-04
Loss = 5.9542e-03, PNorm = 164.8385, GNorm = 0.2141, lr_0 = 3.4003e-04
Loss = 6.7699e-03, PNorm = 164.8481, GNorm = 0.2066, lr_0 = 3.3980e-04
Loss = 5.6240e-03, PNorm = 164.8563, GNorm = 0.3204, lr_0 = 3.3956e-04
Loss = 6.0287e-03, PNorm = 164.8675, GNorm = 0.1916, lr_0 = 3.3933e-04
Loss = 5.8922e-03, PNorm = 164.8791, GNorm = 0.1950, lr_0 = 3.3910e-04
Loss = 5.9228e-03, PNorm = 164.8927, GNorm = 0.3057, lr_0 = 3.3887e-04
Loss = 5.5318e-03, PNorm = 164.9056, GNorm = 0.1800, lr_0 = 3.3864e-04
Loss = 5.4569e-03, PNorm = 164.9153, GNorm = 0.3422, lr_0 = 3.3840e-04
Loss = 6.0629e-03, PNorm = 164.9224, GNorm = 0.1074, lr_0 = 3.3817e-04
Loss = 4.5471e-03, PNorm = 164.9321, GNorm = 0.1425, lr_0 = 3.3794e-04
Loss = 7.9931e-03, PNorm = 164.9443, GNorm = 0.3137, lr_0 = 3.3771e-04
Loss = 5.7976e-03, PNorm = 164.9578, GNorm = 0.0951, lr_0 = 3.3748e-04
Loss = 6.6641e-03, PNorm = 164.9745, GNorm = 0.2109, lr_0 = 3.3725e-04
Loss = 6.3091e-03, PNorm = 164.9890, GNorm = 0.4081, lr_0 = 3.3701e-04
Loss = 7.1338e-03, PNorm = 165.0019, GNorm = 0.2415, lr_0 = 3.3678e-04
Loss = 5.5606e-03, PNorm = 165.0108, GNorm = 0.3404, lr_0 = 3.3655e-04
Loss = 6.1703e-03, PNorm = 165.0210, GNorm = 0.3041, lr_0 = 3.3632e-04
Loss = 5.6142e-03, PNorm = 165.0312, GNorm = 0.2009, lr_0 = 3.3609e-04
Loss = 5.5296e-03, PNorm = 165.0426, GNorm = 0.3182, lr_0 = 3.3586e-04
Loss = 5.1552e-03, PNorm = 165.0514, GNorm = 0.0875, lr_0 = 3.3563e-04
Loss = 6.2762e-03, PNorm = 165.0617, GNorm = 0.3651, lr_0 = 3.3540e-04
Loss = 7.6963e-03, PNorm = 165.0725, GNorm = 0.0949, lr_0 = 3.3517e-04
Loss = 6.0124e-03, PNorm = 165.0846, GNorm = 0.2084, lr_0 = 3.3494e-04
Loss = 5.7369e-03, PNorm = 165.0995, GNorm = 0.2020, lr_0 = 3.3471e-04
Loss = 6.3044e-03, PNorm = 165.1133, GNorm = 0.2416, lr_0 = 3.3448e-04
Loss = 5.3070e-03, PNorm = 165.1261, GNorm = 0.2437, lr_0 = 3.3425e-04
Loss = 5.6758e-03, PNorm = 165.1335, GNorm = 0.3723, lr_0 = 3.3403e-04
Loss = 5.6960e-03, PNorm = 165.1450, GNorm = 0.4754, lr_0 = 3.3380e-04
Loss = 4.7525e-03, PNorm = 165.1565, GNorm = 0.3317, lr_0 = 3.3357e-04
Loss = 5.3522e-03, PNorm = 165.1701, GNorm = 0.2059, lr_0 = 3.3334e-04
Loss = 5.8259e-03, PNorm = 165.1820, GNorm = 0.5663, lr_0 = 3.3311e-04
Loss = 7.6886e-03, PNorm = 165.1944, GNorm = 0.0851, lr_0 = 3.3288e-04
Loss = 6.1790e-03, PNorm = 165.2054, GNorm = 0.2847, lr_0 = 3.3265e-04
Loss = 5.4322e-03, PNorm = 165.2192, GNorm = 0.2171, lr_0 = 3.3243e-04
Loss = 7.3400e-03, PNorm = 165.2291, GNorm = 0.1469, lr_0 = 3.3220e-04
Loss = 5.1734e-03, PNorm = 165.2412, GNorm = 0.0679, lr_0 = 3.3197e-04
Loss = 4.9867e-03, PNorm = 165.2538, GNorm = 0.4071, lr_0 = 3.3174e-04
Loss = 6.6347e-03, PNorm = 165.2669, GNorm = 0.1095, lr_0 = 3.3152e-04
Loss = 6.3550e-03, PNorm = 165.2796, GNorm = 0.2874, lr_0 = 3.3129e-04
Loss = 6.0832e-03, PNorm = 165.2925, GNorm = 0.0919, lr_0 = 3.3106e-04
Loss = 6.5763e-03, PNorm = 165.3048, GNorm = 0.1592, lr_0 = 3.3084e-04
Loss = 5.7400e-03, PNorm = 165.3190, GNorm = 0.1831, lr_0 = 3.3061e-04
Loss = 7.0861e-03, PNorm = 165.3343, GNorm = 0.3220, lr_0 = 3.3038e-04
Loss = 6.4345e-03, PNorm = 165.3477, GNorm = 0.1425, lr_0 = 3.3016e-04
Loss = 6.1770e-03, PNorm = 165.3611, GNorm = 0.2810, lr_0 = 3.2993e-04
Loss = 7.0789e-03, PNorm = 165.3721, GNorm = 0.3264, lr_0 = 3.2970e-04
Loss = 5.9085e-03, PNorm = 165.3827, GNorm = 0.1504, lr_0 = 3.2948e-04
Loss = 5.8818e-03, PNorm = 165.3916, GNorm = 0.2116, lr_0 = 3.2925e-04
Loss = 6.4483e-03, PNorm = 165.4061, GNorm = 0.0861, lr_0 = 3.2903e-04
Loss = 6.8252e-03, PNorm = 165.4176, GNorm = 0.1995, lr_0 = 3.2880e-04
Loss = 7.7418e-03, PNorm = 165.4287, GNorm = 0.6238, lr_0 = 3.2858e-04
Loss = 7.4546e-03, PNorm = 165.4387, GNorm = 0.1791, lr_0 = 3.2835e-04
Loss = 5.4949e-03, PNorm = 165.4533, GNorm = 0.1612, lr_0 = 3.2813e-04
Loss = 6.0286e-03, PNorm = 165.4689, GNorm = 0.2922, lr_0 = 3.2790e-04
Loss = 7.0621e-03, PNorm = 165.4846, GNorm = 0.3297, lr_0 = 3.2768e-04
Loss = 5.8932e-03, PNorm = 165.4981, GNorm = 0.0938, lr_0 = 3.2745e-04
Loss = 5.6877e-03, PNorm = 165.5113, GNorm = 0.1715, lr_0 = 3.2723e-04
Loss = 7.3344e-03, PNorm = 165.5235, GNorm = 0.4501, lr_0 = 3.2700e-04
Loss = 5.6996e-03, PNorm = 165.5379, GNorm = 0.2916, lr_0 = 3.2678e-04
Loss = 4.7723e-03, PNorm = 165.5526, GNorm = 0.1027, lr_0 = 3.2656e-04
Loss = 7.7537e-03, PNorm = 165.5657, GNorm = 0.1279, lr_0 = 3.2633e-04
Loss = 4.7177e-03, PNorm = 165.5759, GNorm = 0.2027, lr_0 = 3.2611e-04
Loss = 5.6602e-03, PNorm = 165.5889, GNorm = 0.1130, lr_0 = 3.2589e-04
Loss = 5.5357e-03, PNorm = 165.6012, GNorm = 0.1188, lr_0 = 3.2566e-04
Loss = 4.9124e-03, PNorm = 165.6125, GNorm = 0.2361, lr_0 = 3.2544e-04
Loss = 6.2721e-03, PNorm = 165.6252, GNorm = 0.1483, lr_0 = 3.2522e-04
Loss = 6.2851e-03, PNorm = 165.6354, GNorm = 0.1260, lr_0 = 3.2499e-04
Loss = 5.4422e-03, PNorm = 165.6504, GNorm = 0.2131, lr_0 = 3.2477e-04
Loss = 8.9471e-03, PNorm = 165.6641, GNorm = 0.1056, lr_0 = 3.2455e-04
Loss = 6.0808e-03, PNorm = 165.6796, GNorm = 0.4863, lr_0 = 3.2433e-04
Loss = 4.8145e-03, PNorm = 165.6963, GNorm = 0.1718, lr_0 = 3.2410e-04
Loss = 5.0696e-03, PNorm = 165.7110, GNorm = 0.1624, lr_0 = 3.2388e-04
Loss = 5.2005e-03, PNorm = 165.7251, GNorm = 0.1544, lr_0 = 3.2366e-04
Loss = 7.1682e-03, PNorm = 165.7381, GNorm = 0.2016, lr_0 = 3.2344e-04
Loss = 6.1696e-03, PNorm = 165.7509, GNorm = 0.3230, lr_0 = 3.2322e-04
Loss = 5.5839e-03, PNorm = 165.7638, GNorm = 0.0555, lr_0 = 3.2300e-04
Loss = 4.7333e-03, PNorm = 165.7751, GNorm = 0.1473, lr_0 = 3.2277e-04
Loss = 5.0843e-03, PNorm = 165.7864, GNorm = 0.1560, lr_0 = 3.2255e-04
Loss = 8.4171e-03, PNorm = 165.7979, GNorm = 0.8140, lr_0 = 3.2233e-04
Loss = 7.5009e-03, PNorm = 165.8112, GNorm = 0.2298, lr_0 = 3.2211e-04
Loss = 5.1765e-03, PNorm = 165.8218, GNorm = 0.4089, lr_0 = 3.2189e-04
Loss = 6.5098e-03, PNorm = 165.8329, GNorm = 0.2677, lr_0 = 3.2167e-04
Loss = 6.2147e-03, PNorm = 165.8439, GNorm = 0.1078, lr_0 = 3.2145e-04
Loss = 4.9921e-03, PNorm = 165.8528, GNorm = 0.1662, lr_0 = 3.2123e-04
Loss = 7.7294e-03, PNorm = 165.8675, GNorm = 0.2106, lr_0 = 3.2101e-04
Loss = 5.7954e-03, PNorm = 165.8849, GNorm = 0.2633, lr_0 = 3.2079e-04
Loss = 5.5720e-03, PNorm = 165.8996, GNorm = 0.0877, lr_0 = 3.2057e-04
Loss = 5.4461e-03, PNorm = 165.9133, GNorm = 0.0829, lr_0 = 3.2035e-04
Loss = 6.9460e-03, PNorm = 165.9262, GNorm = 0.2523, lr_0 = 3.2013e-04
Loss = 9.1567e-03, PNorm = 165.9413, GNorm = 0.2739, lr_0 = 3.1991e-04
Loss = 8.5132e-03, PNorm = 165.9532, GNorm = 0.1239, lr_0 = 3.1969e-04
Loss = 6.9031e-03, PNorm = 165.9672, GNorm = 0.4185, lr_0 = 3.1947e-04
Loss = 6.0922e-03, PNorm = 165.9799, GNorm = 0.2961, lr_0 = 3.1925e-04
Loss = 5.1968e-03, PNorm = 165.9944, GNorm = 0.1567, lr_0 = 3.1904e-04
Loss = 5.8842e-03, PNorm = 166.0062, GNorm = 0.5192, lr_0 = 3.1882e-04
Loss = 7.2688e-03, PNorm = 166.0190, GNorm = 0.0716, lr_0 = 3.1860e-04
Loss = 4.8447e-03, PNorm = 166.0277, GNorm = 0.4190, lr_0 = 3.1838e-04
Loss = 9.3868e-03, PNorm = 166.0423, GNorm = 0.4240, lr_0 = 3.1816e-04
Loss = 6.4498e-03, PNorm = 166.0531, GNorm = 0.2487, lr_0 = 3.1794e-04
Loss = 7.0732e-03, PNorm = 166.0684, GNorm = 0.1876, lr_0 = 3.1773e-04
Loss = 5.7926e-03, PNorm = 166.0830, GNorm = 0.1243, lr_0 = 3.1751e-04
Loss = 5.6026e-03, PNorm = 166.0960, GNorm = 0.7491, lr_0 = 3.1729e-04
Loss = 5.1486e-03, PNorm = 166.1059, GNorm = 0.1819, lr_0 = 3.1707e-04
Loss = 7.4818e-03, PNorm = 166.1216, GNorm = 0.3654, lr_0 = 3.1686e-04
Loss = 5.8255e-03, PNorm = 166.1348, GNorm = 0.1268, lr_0 = 3.1664e-04
Loss = 5.0200e-03, PNorm = 166.1450, GNorm = 0.2035, lr_0 = 3.1642e-04
Loss = 7.4218e-03, PNorm = 166.1591, GNorm = 0.0993, lr_0 = 3.1621e-04
Validation mae = 0.279704
Epoch 16
Loss = 5.2489e-03, PNorm = 166.1657, GNorm = 0.5060, lr_0 = 3.1599e-04
Loss = 5.8587e-03, PNorm = 166.1753, GNorm = 0.1953, lr_0 = 3.1577e-04
Loss = 5.1325e-03, PNorm = 166.1873, GNorm = 0.2815, lr_0 = 3.1556e-04
Loss = 5.3786e-03, PNorm = 166.1993, GNorm = 0.2178, lr_0 = 3.1534e-04
Loss = 4.9301e-03, PNorm = 166.2106, GNorm = 0.1496, lr_0 = 3.1512e-04
Loss = 5.0162e-03, PNorm = 166.2219, GNorm = 0.0901, lr_0 = 3.1491e-04
Loss = 6.0474e-03, PNorm = 166.2326, GNorm = 0.0843, lr_0 = 3.1469e-04
Loss = 4.3558e-03, PNorm = 166.2431, GNorm = 0.3268, lr_0 = 3.1448e-04
Loss = 5.6587e-03, PNorm = 166.2527, GNorm = 0.2363, lr_0 = 3.1426e-04
Loss = 9.6826e-03, PNorm = 166.2606, GNorm = 0.2958, lr_0 = 3.1405e-04
Loss = 5.3290e-03, PNorm = 166.2703, GNorm = 0.2981, lr_0 = 3.1383e-04
Loss = 4.8757e-03, PNorm = 166.2757, GNorm = 0.4112, lr_0 = 3.1362e-04
Loss = 6.2579e-03, PNorm = 166.2841, GNorm = 0.2877, lr_0 = 3.1340e-04
Loss = 4.6558e-03, PNorm = 166.2963, GNorm = 0.1519, lr_0 = 3.1319e-04
Loss = 5.1931e-03, PNorm = 166.3076, GNorm = 0.2661, lr_0 = 3.1297e-04
Loss = 4.8729e-03, PNorm = 166.3210, GNorm = 0.3893, lr_0 = 3.1276e-04
Loss = 6.1501e-03, PNorm = 166.3303, GNorm = 0.1411, lr_0 = 3.1254e-04
Loss = 4.7258e-03, PNorm = 166.3400, GNorm = 0.3257, lr_0 = 3.1233e-04
Loss = 4.6589e-03, PNorm = 166.3499, GNorm = 0.2564, lr_0 = 3.1212e-04
Loss = 5.1314e-03, PNorm = 166.3609, GNorm = 0.2847, lr_0 = 3.1190e-04
Loss = 4.9535e-03, PNorm = 166.3738, GNorm = 0.4336, lr_0 = 3.1169e-04
Loss = 6.6092e-03, PNorm = 166.3826, GNorm = 0.2321, lr_0 = 3.1147e-04
Loss = 4.4908e-03, PNorm = 166.3908, GNorm = 0.1323, lr_0 = 3.1126e-04
Loss = 5.8714e-03, PNorm = 166.3990, GNorm = 0.2119, lr_0 = 3.1105e-04
Loss = 4.3302e-03, PNorm = 166.4102, GNorm = 0.3238, lr_0 = 3.1083e-04
Loss = 5.4784e-03, PNorm = 166.4204, GNorm = 0.3144, lr_0 = 3.1062e-04
Loss = 5.7931e-03, PNorm = 166.4346, GNorm = 0.1269, lr_0 = 3.1041e-04
Loss = 4.4732e-03, PNorm = 166.4485, GNorm = 0.4460, lr_0 = 3.1020e-04
Loss = 4.1435e-03, PNorm = 166.4590, GNorm = 0.1153, lr_0 = 3.0998e-04
Loss = 5.1023e-03, PNorm = 166.4681, GNorm = 0.3100, lr_0 = 3.0977e-04
Loss = 6.2518e-03, PNorm = 166.4775, GNorm = 0.5264, lr_0 = 3.0956e-04
Loss = 4.8619e-03, PNorm = 166.4896, GNorm = 0.1123, lr_0 = 3.0935e-04
Loss = 4.6947e-03, PNorm = 166.5027, GNorm = 0.3522, lr_0 = 3.0914e-04
Loss = 4.9293e-03, PNorm = 166.5129, GNorm = 0.1193, lr_0 = 3.0892e-04
Loss = 4.6841e-03, PNorm = 166.5198, GNorm = 0.2792, lr_0 = 3.0871e-04
Loss = 8.1889e-03, PNorm = 166.5268, GNorm = 0.2012, lr_0 = 3.0850e-04
Loss = 4.6652e-03, PNorm = 166.5389, GNorm = 0.4526, lr_0 = 3.0829e-04
Loss = 5.5192e-03, PNorm = 166.5490, GNorm = 0.0743, lr_0 = 3.0808e-04
Loss = 5.2863e-03, PNorm = 166.5581, GNorm = 0.2012, lr_0 = 3.0787e-04
Loss = 5.3401e-03, PNorm = 166.5676, GNorm = 0.2591, lr_0 = 3.0766e-04
Loss = 4.9394e-03, PNorm = 166.5783, GNorm = 0.3220, lr_0 = 3.0745e-04
Loss = 4.6442e-03, PNorm = 166.5877, GNorm = 0.0939, lr_0 = 3.0723e-04
Loss = 4.6578e-03, PNorm = 166.5960, GNorm = 0.2414, lr_0 = 3.0702e-04
Loss = 4.5101e-03, PNorm = 166.6054, GNorm = 0.0779, lr_0 = 3.0681e-04
Loss = 4.9062e-03, PNorm = 166.6164, GNorm = 0.4009, lr_0 = 3.0660e-04
Loss = 4.4784e-03, PNorm = 166.6267, GNorm = 0.1095, lr_0 = 3.0639e-04
Loss = 7.8840e-03, PNorm = 166.6368, GNorm = 0.1448, lr_0 = 3.0618e-04
Loss = 4.9703e-03, PNorm = 166.6473, GNorm = 0.1772, lr_0 = 3.0597e-04
Loss = 7.1899e-03, PNorm = 166.6561, GNorm = 0.1351, lr_0 = 3.0576e-04
Loss = 5.0620e-03, PNorm = 166.6654, GNorm = 0.0966, lr_0 = 3.0555e-04
Loss = 5.4650e-03, PNorm = 166.6750, GNorm = 0.2376, lr_0 = 3.0535e-04
Loss = 5.4385e-03, PNorm = 166.6880, GNorm = 0.1133, lr_0 = 3.0514e-04
Loss = 5.2016e-03, PNorm = 166.6979, GNorm = 0.0889, lr_0 = 3.0493e-04
Loss = 4.5108e-03, PNorm = 166.7043, GNorm = 0.3014, lr_0 = 3.0472e-04
Loss = 4.9655e-03, PNorm = 166.7112, GNorm = 0.1970, lr_0 = 3.0451e-04
Loss = 5.7048e-03, PNorm = 166.7188, GNorm = 0.1967, lr_0 = 3.0430e-04
Loss = 6.1017e-03, PNorm = 166.7298, GNorm = 0.2637, lr_0 = 3.0409e-04
Loss = 6.3154e-03, PNorm = 166.7434, GNorm = 0.4070, lr_0 = 3.0388e-04
Loss = 4.7205e-03, PNorm = 166.7587, GNorm = 0.2029, lr_0 = 3.0368e-04
Loss = 4.8910e-03, PNorm = 166.7677, GNorm = 0.3514, lr_0 = 3.0347e-04
Loss = 5.2213e-03, PNorm = 166.7790, GNorm = 0.2638, lr_0 = 3.0326e-04
Loss = 5.6513e-03, PNorm = 166.7899, GNorm = 0.2246, lr_0 = 3.0305e-04
Loss = 4.5945e-03, PNorm = 166.8024, GNorm = 0.1658, lr_0 = 3.0284e-04
Loss = 6.1935e-03, PNorm = 166.8105, GNorm = 0.2630, lr_0 = 3.0264e-04
Loss = 4.3911e-03, PNorm = 166.8201, GNorm = 0.2440, lr_0 = 3.0243e-04
Loss = 4.2940e-03, PNorm = 166.8290, GNorm = 0.1133, lr_0 = 3.0222e-04
Loss = 6.2503e-03, PNorm = 166.8370, GNorm = 0.1729, lr_0 = 3.0202e-04
Loss = 4.2838e-03, PNorm = 166.8443, GNorm = 0.3426, lr_0 = 3.0181e-04
Loss = 4.7884e-03, PNorm = 166.8537, GNorm = 0.2966, lr_0 = 3.0160e-04
Loss = 6.1133e-03, PNorm = 166.8648, GNorm = 0.2014, lr_0 = 3.0140e-04
Loss = 4.7925e-03, PNorm = 166.8754, GNorm = 0.2510, lr_0 = 3.0119e-04
Loss = 4.8711e-03, PNorm = 166.8900, GNorm = 0.3373, lr_0 = 3.0098e-04
Loss = 4.4288e-03, PNorm = 166.8995, GNorm = 0.3542, lr_0 = 3.0078e-04
Loss = 5.9730e-03, PNorm = 166.9111, GNorm = 0.1026, lr_0 = 3.0057e-04
Loss = 6.4686e-03, PNorm = 166.9195, GNorm = 0.1930, lr_0 = 3.0036e-04
Loss = 4.7655e-03, PNorm = 166.9274, GNorm = 0.0630, lr_0 = 3.0016e-04
Loss = 3.9383e-03, PNorm = 166.9347, GNorm = 0.0877, lr_0 = 2.9995e-04
Loss = 6.5770e-03, PNorm = 166.9449, GNorm = 0.1839, lr_0 = 2.9975e-04
Loss = 4.5316e-03, PNorm = 166.9567, GNorm = 0.1744, lr_0 = 2.9954e-04
Loss = 4.0861e-03, PNorm = 166.9705, GNorm = 0.1625, lr_0 = 2.9934e-04
Loss = 7.2121e-03, PNorm = 166.9830, GNorm = 0.6788, lr_0 = 2.9913e-04
Loss = 5.7384e-03, PNorm = 166.9940, GNorm = 0.1393, lr_0 = 2.9893e-04
Loss = 4.1149e-03, PNorm = 167.0041, GNorm = 0.0733, lr_0 = 2.9872e-04
Loss = 5.0593e-03, PNorm = 167.0138, GNorm = 0.2135, lr_0 = 2.9852e-04
Loss = 4.0123e-03, PNorm = 167.0217, GNorm = 0.2514, lr_0 = 2.9831e-04
Loss = 6.0904e-03, PNorm = 167.0319, GNorm = 0.2657, lr_0 = 2.9811e-04
Loss = 6.9368e-03, PNorm = 167.0443, GNorm = 0.2503, lr_0 = 2.9790e-04
Loss = 5.5899e-03, PNorm = 167.0578, GNorm = 0.1279, lr_0 = 2.9770e-04
Loss = 4.9509e-03, PNorm = 167.0711, GNorm = 0.1237, lr_0 = 2.9750e-04
Loss = 5.0842e-03, PNorm = 167.0837, GNorm = 0.1818, lr_0 = 2.9729e-04
Loss = 4.6104e-03, PNorm = 167.0950, GNorm = 0.1013, lr_0 = 2.9709e-04
Loss = 5.0541e-03, PNorm = 167.1056, GNorm = 0.1039, lr_0 = 2.9689e-04
Loss = 6.6636e-03, PNorm = 167.1158, GNorm = 0.5212, lr_0 = 2.9668e-04
Loss = 5.6389e-03, PNorm = 167.1287, GNorm = 0.1917, lr_0 = 2.9648e-04
Loss = 5.5611e-03, PNorm = 167.1421, GNorm = 0.1871, lr_0 = 2.9628e-04
Loss = 4.3464e-03, PNorm = 167.1509, GNorm = 0.1408, lr_0 = 2.9607e-04
Loss = 6.0135e-03, PNorm = 167.1588, GNorm = 0.1828, lr_0 = 2.9587e-04
Loss = 5.0769e-03, PNorm = 167.1682, GNorm = 0.2142, lr_0 = 2.9567e-04
Loss = 8.7195e-03, PNorm = 167.1829, GNorm = 0.2792, lr_0 = 2.9546e-04
Loss = 4.2928e-03, PNorm = 167.1969, GNorm = 0.1395, lr_0 = 2.9526e-04
Loss = 4.9808e-03, PNorm = 167.2082, GNorm = 0.2887, lr_0 = 2.9506e-04
Loss = 4.4187e-03, PNorm = 167.2188, GNorm = 0.2441, lr_0 = 2.9486e-04
Loss = 4.4481e-03, PNorm = 167.2276, GNorm = 0.1569, lr_0 = 2.9466e-04
Loss = 6.0191e-03, PNorm = 167.2365, GNorm = 0.4156, lr_0 = 2.9445e-04
Loss = 5.8660e-03, PNorm = 167.2465, GNorm = 0.3966, lr_0 = 2.9425e-04
Loss = 4.5675e-03, PNorm = 167.2565, GNorm = 0.1929, lr_0 = 2.9405e-04
Loss = 5.2888e-03, PNorm = 167.2684, GNorm = 0.1735, lr_0 = 2.9385e-04
Loss = 8.9373e-03, PNorm = 167.2830, GNorm = 0.1126, lr_0 = 2.9365e-04
Loss = 5.5008e-03, PNorm = 167.2981, GNorm = 0.2855, lr_0 = 2.9345e-04
Loss = 7.3734e-03, PNorm = 167.3120, GNorm = 0.4756, lr_0 = 2.9325e-04
Loss = 4.3444e-03, PNorm = 167.3238, GNorm = 0.3515, lr_0 = 2.9305e-04
Loss = 5.6145e-03, PNorm = 167.3374, GNorm = 0.2825, lr_0 = 2.9284e-04
Loss = 4.2892e-03, PNorm = 167.3481, GNorm = 0.0607, lr_0 = 2.9264e-04
Loss = 8.5894e-03, PNorm = 167.3588, GNorm = 0.2472, lr_0 = 2.9244e-04
Loss = 5.7383e-03, PNorm = 167.3726, GNorm = 0.1932, lr_0 = 2.9224e-04
Loss = 6.7681e-03, PNorm = 167.3841, GNorm = 0.1412, lr_0 = 2.9204e-04
Loss = 5.2331e-03, PNorm = 167.3963, GNorm = 0.2245, lr_0 = 2.9184e-04
Loss = 4.9835e-03, PNorm = 167.4076, GNorm = 0.2332, lr_0 = 2.9164e-04
Loss = 5.7390e-03, PNorm = 167.4176, GNorm = 0.2211, lr_0 = 2.9144e-04
Loss = 5.3862e-03, PNorm = 167.4287, GNorm = 0.0997, lr_0 = 2.9124e-04
Validation mae = 0.278819
Epoch 17
Loss = 3.6208e-03, PNorm = 167.4358, GNorm = 0.1051, lr_0 = 2.9104e-04
Loss = 5.8653e-03, PNorm = 167.4406, GNorm = 0.4515, lr_0 = 2.9084e-04
Loss = 4.7927e-03, PNorm = 167.4469, GNorm = 0.0972, lr_0 = 2.9065e-04
Loss = 4.8738e-03, PNorm = 167.4552, GNorm = 0.2487, lr_0 = 2.9045e-04
Loss = 3.8493e-03, PNorm = 167.4652, GNorm = 0.0848, lr_0 = 2.9025e-04
Loss = 4.4044e-03, PNorm = 167.4733, GNorm = 0.1044, lr_0 = 2.9005e-04
Loss = 5.8463e-03, PNorm = 167.4829, GNorm = 0.7234, lr_0 = 2.8985e-04
Loss = 3.8248e-03, PNorm = 167.4920, GNorm = 0.2098, lr_0 = 2.8965e-04
Loss = 5.1584e-03, PNorm = 167.4978, GNorm = 0.2566, lr_0 = 2.8945e-04
Loss = 5.2530e-03, PNorm = 167.5072, GNorm = 0.1442, lr_0 = 2.8925e-04
Loss = 4.9820e-03, PNorm = 167.5169, GNorm = 0.1885, lr_0 = 2.8906e-04
Loss = 3.8438e-03, PNorm = 167.5240, GNorm = 0.2738, lr_0 = 2.8886e-04
Loss = 4.8299e-03, PNorm = 167.5325, GNorm = 0.1296, lr_0 = 2.8866e-04
Loss = 4.6596e-03, PNorm = 167.5408, GNorm = 0.2380, lr_0 = 2.8846e-04
Loss = 4.7890e-03, PNorm = 167.5509, GNorm = 0.1283, lr_0 = 2.8826e-04
Loss = 4.0929e-03, PNorm = 167.5584, GNorm = 0.2462, lr_0 = 2.8807e-04
Loss = 3.6872e-03, PNorm = 167.5657, GNorm = 0.3083, lr_0 = 2.8787e-04
Loss = 3.5939e-03, PNorm = 167.5768, GNorm = 0.1252, lr_0 = 2.8767e-04
Loss = 3.9306e-03, PNorm = 167.5857, GNorm = 0.2385, lr_0 = 2.8748e-04
Loss = 4.8409e-03, PNorm = 167.5951, GNorm = 0.4301, lr_0 = 2.8728e-04
Loss = 4.5588e-03, PNorm = 167.6056, GNorm = 0.6891, lr_0 = 2.8708e-04
Loss = 5.8534e-03, PNorm = 167.6093, GNorm = 0.0795, lr_0 = 2.8689e-04
Loss = 4.2173e-03, PNorm = 167.6139, GNorm = 0.2059, lr_0 = 2.8669e-04
Loss = 5.0745e-03, PNorm = 167.6197, GNorm = 0.3959, lr_0 = 2.8649e-04
Loss = 4.0865e-03, PNorm = 167.6276, GNorm = 0.2751, lr_0 = 2.8630e-04
Loss = 3.8992e-03, PNorm = 167.6352, GNorm = 0.2150, lr_0 = 2.8610e-04
Loss = 4.0114e-03, PNorm = 167.6454, GNorm = 0.0682, lr_0 = 2.8590e-04
Loss = 4.9965e-03, PNorm = 167.6545, GNorm = 0.2123, lr_0 = 2.8571e-04
Loss = 5.6084e-03, PNorm = 167.6651, GNorm = 0.2089, lr_0 = 2.8551e-04
Loss = 3.9999e-03, PNorm = 167.6757, GNorm = 0.0945, lr_0 = 2.8532e-04
Loss = 3.8982e-03, PNorm = 167.6841, GNorm = 0.1524, lr_0 = 2.8512e-04
Loss = 4.2713e-03, PNorm = 167.6927, GNorm = 0.1320, lr_0 = 2.8493e-04
Loss = 4.0516e-03, PNorm = 167.6990, GNorm = 0.1393, lr_0 = 2.8473e-04
Loss = 3.7313e-03, PNorm = 167.7062, GNorm = 0.1111, lr_0 = 2.8454e-04
Loss = 3.7083e-03, PNorm = 167.7139, GNorm = 0.2569, lr_0 = 2.8434e-04
Loss = 3.5320e-03, PNorm = 167.7231, GNorm = 0.0827, lr_0 = 2.8415e-04
Loss = 8.9193e-03, PNorm = 167.7346, GNorm = 0.2306, lr_0 = 2.8395e-04
Loss = 3.7493e-03, PNorm = 167.7449, GNorm = 0.1789, lr_0 = 2.8376e-04
Loss = 3.8619e-03, PNorm = 167.7534, GNorm = 0.1871, lr_0 = 2.8356e-04
Loss = 3.6241e-03, PNorm = 167.7644, GNorm = 0.1045, lr_0 = 2.8337e-04
Loss = 3.8389e-03, PNorm = 167.7743, GNorm = 0.3261, lr_0 = 2.8317e-04
Loss = 4.1938e-03, PNorm = 167.7806, GNorm = 0.0624, lr_0 = 2.8298e-04
Loss = 4.5938e-03, PNorm = 167.7887, GNorm = 0.1652, lr_0 = 2.8279e-04
Loss = 5.3889e-03, PNorm = 167.7955, GNorm = 0.1108, lr_0 = 2.8259e-04
Loss = 6.0604e-03, PNorm = 167.8018, GNorm = 0.3766, lr_0 = 2.8240e-04
Loss = 4.8604e-03, PNorm = 167.8081, GNorm = 0.2779, lr_0 = 2.8221e-04
Loss = 3.7206e-03, PNorm = 167.8157, GNorm = 0.1362, lr_0 = 2.8201e-04
Loss = 4.0208e-03, PNorm = 167.8243, GNorm = 0.1428, lr_0 = 2.8182e-04
Loss = 4.3713e-03, PNorm = 167.8331, GNorm = 0.4202, lr_0 = 2.8163e-04
Loss = 6.2939e-03, PNorm = 167.8402, GNorm = 0.1798, lr_0 = 2.8143e-04
Loss = 5.1276e-03, PNorm = 167.8494, GNorm = 0.0715, lr_0 = 2.8124e-04
Loss = 4.1643e-03, PNorm = 167.8569, GNorm = 0.1182, lr_0 = 2.8105e-04
Loss = 3.5846e-03, PNorm = 167.8648, GNorm = 0.2213, lr_0 = 2.8085e-04
Loss = 3.4486e-03, PNorm = 167.8724, GNorm = 0.2147, lr_0 = 2.8066e-04
Loss = 6.1467e-03, PNorm = 167.8791, GNorm = 0.6323, lr_0 = 2.8047e-04
Loss = 4.4387e-03, PNorm = 167.8877, GNorm = 0.1131, lr_0 = 2.8028e-04
Loss = 4.3877e-03, PNorm = 167.8973, GNorm = 0.0641, lr_0 = 2.8009e-04
Loss = 4.0387e-03, PNorm = 167.9066, GNorm = 0.1619, lr_0 = 2.7989e-04
Loss = 4.7287e-03, PNorm = 167.9159, GNorm = 0.1574, lr_0 = 2.7970e-04
Loss = 4.8128e-03, PNorm = 167.9246, GNorm = 0.2251, lr_0 = 2.7951e-04
Loss = 4.3552e-03, PNorm = 167.9350, GNorm = 0.3785, lr_0 = 2.7932e-04
Loss = 5.4128e-03, PNorm = 167.9463, GNorm = 0.1938, lr_0 = 2.7913e-04
Loss = 4.9597e-03, PNorm = 167.9575, GNorm = 0.2610, lr_0 = 2.7894e-04
Loss = 5.3984e-03, PNorm = 167.9647, GNorm = 0.1646, lr_0 = 2.7875e-04
Loss = 4.8377e-03, PNorm = 167.9722, GNorm = 0.1691, lr_0 = 2.7855e-04
Loss = 4.2728e-03, PNorm = 167.9834, GNorm = 0.1641, lr_0 = 2.7836e-04
Loss = 4.0217e-03, PNorm = 167.9943, GNorm = 0.2503, lr_0 = 2.7817e-04
Loss = 3.2167e-03, PNorm = 168.0047, GNorm = 0.0594, lr_0 = 2.7798e-04
Loss = 5.3917e-03, PNorm = 168.0158, GNorm = 0.1318, lr_0 = 2.7779e-04
Loss = 9.1178e-03, PNorm = 168.0229, GNorm = 0.2961, lr_0 = 2.7760e-04
Loss = 4.2882e-03, PNorm = 168.0334, GNorm = 0.2114, lr_0 = 2.7741e-04
Loss = 4.2453e-03, PNorm = 168.0431, GNorm = 0.1427, lr_0 = 2.7722e-04
Loss = 3.9112e-03, PNorm = 168.0538, GNorm = 0.1515, lr_0 = 2.7703e-04
Loss = 4.7415e-03, PNorm = 168.0650, GNorm = 0.1930, lr_0 = 2.7684e-04
Loss = 4.7121e-03, PNorm = 168.0762, GNorm = 0.0996, lr_0 = 2.7665e-04
Loss = 3.6762e-03, PNorm = 168.0864, GNorm = 0.2637, lr_0 = 2.7646e-04
Loss = 4.0425e-03, PNorm = 168.0948, GNorm = 0.1217, lr_0 = 2.7627e-04
Loss = 3.9802e-03, PNorm = 168.1029, GNorm = 0.4087, lr_0 = 2.7608e-04
Loss = 4.2202e-03, PNorm = 168.1127, GNorm = 0.1261, lr_0 = 2.7590e-04
Loss = 5.5316e-03, PNorm = 168.1224, GNorm = 0.2217, lr_0 = 2.7571e-04
Loss = 3.8507e-03, PNorm = 168.1315, GNorm = 0.1674, lr_0 = 2.7552e-04
Loss = 6.4875e-03, PNorm = 168.1419, GNorm = 0.2413, lr_0 = 2.7533e-04
Loss = 4.2379e-03, PNorm = 168.1545, GNorm = 0.2304, lr_0 = 2.7514e-04
Loss = 3.9673e-03, PNorm = 168.1655, GNorm = 0.2908, lr_0 = 2.7495e-04
Loss = 3.1990e-03, PNorm = 168.1739, GNorm = 0.1324, lr_0 = 2.7476e-04
Loss = 3.9921e-03, PNorm = 168.1805, GNorm = 0.2106, lr_0 = 2.7457e-04
Loss = 4.4112e-03, PNorm = 168.1904, GNorm = 0.1503, lr_0 = 2.7439e-04
Loss = 5.5564e-03, PNorm = 168.1995, GNorm = 0.1965, lr_0 = 2.7420e-04
Loss = 4.5708e-03, PNorm = 168.2089, GNorm = 0.3132, lr_0 = 2.7401e-04
Loss = 3.8380e-03, PNorm = 168.2145, GNorm = 0.3172, lr_0 = 2.7382e-04
Loss = 4.7222e-03, PNorm = 168.2229, GNorm = 0.2548, lr_0 = 2.7364e-04
Loss = 3.7697e-03, PNorm = 168.2319, GNorm = 0.3839, lr_0 = 2.7345e-04
Loss = 4.2662e-03, PNorm = 168.2391, GNorm = 0.1490, lr_0 = 2.7326e-04
Loss = 4.3202e-03, PNorm = 168.2482, GNorm = 0.0796, lr_0 = 2.7307e-04
Loss = 3.5354e-03, PNorm = 168.2577, GNorm = 0.4357, lr_0 = 2.7289e-04
Loss = 4.1337e-03, PNorm = 168.2653, GNorm = 0.1106, lr_0 = 2.7270e-04
Loss = 7.0394e-03, PNorm = 168.2726, GNorm = 0.1569, lr_0 = 2.7251e-04
Loss = 5.6484e-03, PNorm = 168.2822, GNorm = 0.4859, lr_0 = 2.7233e-04
Loss = 4.2889e-03, PNorm = 168.2914, GNorm = 0.0870, lr_0 = 2.7214e-04
Loss = 5.7035e-03, PNorm = 168.3002, GNorm = 0.3317, lr_0 = 2.7195e-04
Loss = 3.8018e-03, PNorm = 168.3081, GNorm = 0.2079, lr_0 = 2.7177e-04
Loss = 4.8373e-03, PNorm = 168.3180, GNorm = 0.3578, lr_0 = 2.7158e-04
Loss = 4.9703e-03, PNorm = 168.3295, GNorm = 0.0828, lr_0 = 2.7139e-04
Loss = 4.8199e-03, PNorm = 168.3421, GNorm = 0.1600, lr_0 = 2.7121e-04
Loss = 4.3986e-03, PNorm = 168.3532, GNorm = 0.2420, lr_0 = 2.7102e-04
Loss = 3.7690e-03, PNorm = 168.3624, GNorm = 0.1108, lr_0 = 2.7084e-04
Loss = 6.0983e-03, PNorm = 168.3706, GNorm = 0.2452, lr_0 = 2.7065e-04
Loss = 4.0830e-03, PNorm = 168.3786, GNorm = 0.0634, lr_0 = 2.7047e-04
Loss = 9.8117e-03, PNorm = 168.3867, GNorm = 0.1104, lr_0 = 2.7028e-04
Loss = 4.1492e-03, PNorm = 168.3950, GNorm = 0.0831, lr_0 = 2.7010e-04
Loss = 3.7739e-03, PNorm = 168.4039, GNorm = 0.2069, lr_0 = 2.6991e-04
Loss = 5.1417e-03, PNorm = 168.4142, GNorm = 0.1904, lr_0 = 2.6973e-04
Loss = 5.4130e-03, PNorm = 168.4219, GNorm = 0.4954, lr_0 = 2.6954e-04
Loss = 5.4656e-03, PNorm = 168.4300, GNorm = 0.4555, lr_0 = 2.6936e-04
Loss = 3.7208e-03, PNorm = 168.4378, GNorm = 0.3562, lr_0 = 2.6917e-04
Loss = 5.8790e-03, PNorm = 168.4496, GNorm = 0.5927, lr_0 = 2.6899e-04
Loss = 4.4693e-03, PNorm = 168.4582, GNorm = 0.1479, lr_0 = 2.6880e-04
Loss = 7.8892e-03, PNorm = 168.4671, GNorm = 0.1108, lr_0 = 2.6862e-04
Loss = 4.1612e-03, PNorm = 168.4776, GNorm = 0.3656, lr_0 = 2.6844e-04
Loss = 3.8952e-03, PNorm = 168.4865, GNorm = 0.1500, lr_0 = 2.6825e-04
Validation mae = 0.279047
Epoch 18
Loss = 4.1354e-03, PNorm = 168.4969, GNorm = 0.1484, lr_0 = 2.6807e-04
Loss = 3.8493e-03, PNorm = 168.5025, GNorm = 0.5043, lr_0 = 2.6788e-04
Loss = 3.6597e-03, PNorm = 168.5094, GNorm = 0.1838, lr_0 = 2.6770e-04
Loss = 3.3998e-03, PNorm = 168.5169, GNorm = 0.1268, lr_0 = 2.6752e-04
Loss = 3.8777e-03, PNorm = 168.5252, GNorm = 0.2051, lr_0 = 2.6733e-04
Loss = 4.8756e-03, PNorm = 168.5305, GNorm = 0.2252, lr_0 = 2.6715e-04
Loss = 3.7756e-03, PNorm = 168.5374, GNorm = 0.2160, lr_0 = 2.6697e-04
Loss = 5.1074e-03, PNorm = 168.5430, GNorm = 0.0989, lr_0 = 2.6678e-04
Loss = 3.6780e-03, PNorm = 168.5474, GNorm = 0.0640, lr_0 = 2.6660e-04
Loss = 3.3451e-03, PNorm = 168.5522, GNorm = 0.1157, lr_0 = 2.6642e-04
Loss = 4.8097e-03, PNorm = 168.5590, GNorm = 0.2658, lr_0 = 2.6624e-04
Loss = 3.9031e-03, PNorm = 168.5646, GNorm = 0.1696, lr_0 = 2.6605e-04
Loss = 3.5443e-03, PNorm = 168.5711, GNorm = 0.2083, lr_0 = 2.6587e-04
Loss = 3.7303e-03, PNorm = 168.5804, GNorm = 0.1604, lr_0 = 2.6569e-04
Loss = 3.5632e-03, PNorm = 168.5859, GNorm = 0.0914, lr_0 = 2.6551e-04
Loss = 6.8949e-03, PNorm = 168.5939, GNorm = 0.5241, lr_0 = 2.6533e-04
Loss = 5.1483e-03, PNorm = 168.6011, GNorm = 0.3635, lr_0 = 2.6514e-04
Loss = 4.0872e-03, PNorm = 168.6096, GNorm = 0.3689, lr_0 = 2.6496e-04
Loss = 4.6336e-03, PNorm = 168.6139, GNorm = 0.1128, lr_0 = 2.6478e-04
Loss = 5.2054e-03, PNorm = 168.6202, GNorm = 0.4230, lr_0 = 2.6460e-04
Loss = 5.1996e-03, PNorm = 168.6258, GNorm = 0.2542, lr_0 = 2.6442e-04
Loss = 3.4011e-03, PNorm = 168.6337, GNorm = 0.2102, lr_0 = 2.6424e-04
Loss = 3.6090e-03, PNorm = 168.6444, GNorm = 0.1072, lr_0 = 2.6406e-04
Loss = 3.5064e-03, PNorm = 168.6523, GNorm = 0.1316, lr_0 = 2.6388e-04
Loss = 5.5572e-03, PNorm = 168.6611, GNorm = 0.1088, lr_0 = 2.6369e-04
Loss = 4.4718e-03, PNorm = 168.6678, GNorm = 0.3724, lr_0 = 2.6351e-04
Loss = 3.1549e-03, PNorm = 168.6763, GNorm = 0.2571, lr_0 = 2.6333e-04
Loss = 3.7403e-03, PNorm = 168.6861, GNorm = 0.1724, lr_0 = 2.6315e-04
Loss = 3.3315e-03, PNorm = 168.6957, GNorm = 0.2255, lr_0 = 2.6297e-04
Loss = 4.4451e-03, PNorm = 168.7018, GNorm = 0.3308, lr_0 = 2.6279e-04
Loss = 3.1828e-03, PNorm = 168.7074, GNorm = 0.2844, lr_0 = 2.6261e-04
Loss = 4.3537e-03, PNorm = 168.7152, GNorm = 0.4677, lr_0 = 2.6243e-04
Loss = 3.8481e-03, PNorm = 168.7226, GNorm = 0.1819, lr_0 = 2.6225e-04
Loss = 4.2757e-03, PNorm = 168.7290, GNorm = 0.2068, lr_0 = 2.6207e-04
Loss = 4.0921e-03, PNorm = 168.7375, GNorm = 0.2802, lr_0 = 2.6189e-04
Loss = 3.5611e-03, PNorm = 168.7462, GNorm = 0.3057, lr_0 = 2.6171e-04
Loss = 3.2469e-03, PNorm = 168.7530, GNorm = 0.3574, lr_0 = 2.6153e-04
Loss = 3.5750e-03, PNorm = 168.7610, GNorm = 0.4359, lr_0 = 2.6136e-04
Loss = 4.4355e-03, PNorm = 168.7685, GNorm = 0.3280, lr_0 = 2.6118e-04
Loss = 2.8609e-03, PNorm = 168.7762, GNorm = 0.1316, lr_0 = 2.6100e-04
Loss = 4.0275e-03, PNorm = 168.7827, GNorm = 0.1839, lr_0 = 2.6082e-04
Loss = 3.6649e-03, PNorm = 168.7898, GNorm = 0.2387, lr_0 = 2.6064e-04
Loss = 3.7068e-03, PNorm = 168.7969, GNorm = 0.1412, lr_0 = 2.6046e-04
Loss = 3.6114e-03, PNorm = 168.8037, GNorm = 0.1717, lr_0 = 2.6028e-04
Loss = 4.2571e-03, PNorm = 168.8114, GNorm = 0.1431, lr_0 = 2.6011e-04
Loss = 4.2149e-03, PNorm = 168.8201, GNorm = 0.1682, lr_0 = 2.5993e-04
Loss = 3.8897e-03, PNorm = 168.8268, GNorm = 0.1528, lr_0 = 2.5975e-04
Loss = 4.3671e-03, PNorm = 168.8313, GNorm = 0.2228, lr_0 = 2.5957e-04
Loss = 4.3158e-03, PNorm = 168.8403, GNorm = 0.1610, lr_0 = 2.5939e-04
Loss = 3.5667e-03, PNorm = 168.8479, GNorm = 0.1152, lr_0 = 2.5922e-04
Loss = 4.2676e-03, PNorm = 168.8550, GNorm = 0.0667, lr_0 = 2.5904e-04
Loss = 3.0059e-03, PNorm = 168.8634, GNorm = 0.1404, lr_0 = 2.5886e-04
Loss = 4.4469e-03, PNorm = 168.8720, GNorm = 0.3622, lr_0 = 2.5868e-04
Loss = 5.1369e-03, PNorm = 168.8802, GNorm = 0.2678, lr_0 = 2.5851e-04
Loss = 4.3982e-03, PNorm = 168.8889, GNorm = 0.0870, lr_0 = 2.5833e-04
Loss = 3.6334e-03, PNorm = 168.8970, GNorm = 0.2119, lr_0 = 2.5815e-04
Loss = 6.3146e-03, PNorm = 168.9030, GNorm = 0.3309, lr_0 = 2.5797e-04
Loss = 3.5328e-03, PNorm = 168.9070, GNorm = 0.2041, lr_0 = 2.5780e-04
Loss = 3.1680e-03, PNorm = 168.9132, GNorm = 0.0827, lr_0 = 2.5762e-04
Loss = 4.2788e-03, PNorm = 168.9213, GNorm = 0.2719, lr_0 = 2.5745e-04
Loss = 3.4051e-03, PNorm = 168.9317, GNorm = 0.0543, lr_0 = 2.5727e-04
Loss = 6.5373e-03, PNorm = 168.9440, GNorm = 0.1404, lr_0 = 2.5709e-04
Loss = 3.5441e-03, PNorm = 168.9504, GNorm = 0.1054, lr_0 = 2.5692e-04
Loss = 3.3127e-03, PNorm = 168.9587, GNorm = 0.0561, lr_0 = 2.5674e-04
Loss = 3.6924e-03, PNorm = 168.9660, GNorm = 0.2175, lr_0 = 2.5656e-04
Loss = 4.1610e-03, PNorm = 168.9759, GNorm = 0.4269, lr_0 = 2.5639e-04
Loss = 4.7319e-03, PNorm = 168.9850, GNorm = 0.4474, lr_0 = 2.5621e-04
Loss = 2.8653e-03, PNorm = 168.9928, GNorm = 0.1202, lr_0 = 2.5604e-04
Loss = 4.1836e-03, PNorm = 168.9981, GNorm = 0.3345, lr_0 = 2.5586e-04
Loss = 4.8908e-03, PNorm = 169.0040, GNorm = 0.3966, lr_0 = 2.5569e-04
Loss = 4.5074e-03, PNorm = 169.0138, GNorm = 0.1879, lr_0 = 2.5551e-04
Loss = 5.1568e-03, PNorm = 169.0214, GNorm = 0.2201, lr_0 = 2.5534e-04
Loss = 4.1198e-03, PNorm = 169.0297, GNorm = 0.3069, lr_0 = 2.5516e-04
Loss = 3.2008e-03, PNorm = 169.0384, GNorm = 0.2314, lr_0 = 2.5499e-04
Loss = 4.2958e-03, PNorm = 169.0451, GNorm = 0.1535, lr_0 = 2.5481e-04
Loss = 3.1375e-03, PNorm = 169.0539, GNorm = 0.1406, lr_0 = 2.5464e-04
Loss = 6.7196e-03, PNorm = 169.0633, GNorm = 0.1086, lr_0 = 2.5446e-04
Loss = 4.0353e-03, PNorm = 169.0725, GNorm = 0.2135, lr_0 = 2.5429e-04
Loss = 8.5741e-03, PNorm = 169.0811, GNorm = 0.0575, lr_0 = 2.5411e-04
Loss = 3.1875e-03, PNorm = 169.0893, GNorm = 0.1475, lr_0 = 2.5394e-04
Loss = 3.2407e-03, PNorm = 169.0990, GNorm = 0.1433, lr_0 = 2.5377e-04
Loss = 4.8209e-03, PNorm = 169.1066, GNorm = 0.4882, lr_0 = 2.5359e-04
Loss = 3.3137e-03, PNorm = 169.1116, GNorm = 0.1899, lr_0 = 2.5342e-04
Loss = 2.7613e-03, PNorm = 169.1174, GNorm = 0.1660, lr_0 = 2.5325e-04
Loss = 3.3518e-03, PNorm = 169.1233, GNorm = 0.1330, lr_0 = 2.5307e-04
Loss = 3.6340e-03, PNorm = 169.1303, GNorm = 0.0810, lr_0 = 2.5290e-04
Loss = 4.1190e-03, PNorm = 169.1392, GNorm = 0.0721, lr_0 = 2.5273e-04
Loss = 3.4546e-03, PNorm = 169.1473, GNorm = 0.1007, lr_0 = 2.5255e-04
Loss = 5.3939e-03, PNorm = 169.1534, GNorm = 0.1927, lr_0 = 2.5238e-04
Loss = 4.3970e-03, PNorm = 169.1606, GNorm = 0.1064, lr_0 = 2.5221e-04
Loss = 2.7269e-03, PNorm = 169.1672, GNorm = 0.1736, lr_0 = 2.5203e-04
Loss = 3.2807e-03, PNorm = 169.1733, GNorm = 0.2153, lr_0 = 2.5186e-04
Loss = 3.1136e-03, PNorm = 169.1789, GNorm = 0.0917, lr_0 = 2.5169e-04
Loss = 4.6765e-03, PNorm = 169.1873, GNorm = 0.0770, lr_0 = 2.5152e-04
Loss = 3.2428e-03, PNorm = 169.1954, GNorm = 0.0713, lr_0 = 2.5134e-04
Loss = 3.0405e-03, PNorm = 169.2021, GNorm = 0.2005, lr_0 = 2.5117e-04
Loss = 3.7606e-03, PNorm = 169.2080, GNorm = 0.3650, lr_0 = 2.5100e-04
Loss = 4.1028e-03, PNorm = 169.2151, GNorm = 0.2401, lr_0 = 2.5083e-04
Loss = 2.9313e-03, PNorm = 169.2219, GNorm = 0.2783, lr_0 = 2.5066e-04
Loss = 3.1374e-03, PNorm = 169.2325, GNorm = 0.1404, lr_0 = 2.5048e-04
Loss = 3.9965e-03, PNorm = 169.2437, GNorm = 0.1404, lr_0 = 2.5031e-04
Loss = 3.4105e-03, PNorm = 169.2521, GNorm = 0.0573, lr_0 = 2.5014e-04
Loss = 3.9909e-03, PNorm = 169.2592, GNorm = 0.2789, lr_0 = 2.4997e-04
Loss = 3.1696e-03, PNorm = 169.2669, GNorm = 0.0926, lr_0 = 2.4980e-04
Loss = 3.4843e-03, PNorm = 169.2742, GNorm = 0.0785, lr_0 = 2.4963e-04
Loss = 4.2255e-03, PNorm = 169.2827, GNorm = 0.2719, lr_0 = 2.4946e-04
Loss = 2.8779e-03, PNorm = 169.2895, GNorm = 0.2094, lr_0 = 2.4929e-04
Loss = 3.9195e-03, PNorm = 169.2973, GNorm = 0.0833, lr_0 = 2.4911e-04
Loss = 3.2899e-03, PNorm = 169.3054, GNorm = 0.3064, lr_0 = 2.4894e-04
Loss = 2.8075e-03, PNorm = 169.3132, GNorm = 0.0494, lr_0 = 2.4877e-04
Loss = 4.3932e-03, PNorm = 169.3185, GNorm = 0.3566, lr_0 = 2.4860e-04
Loss = 3.1878e-03, PNorm = 169.3275, GNorm = 0.1768, lr_0 = 2.4843e-04
Loss = 3.8763e-03, PNorm = 169.3348, GNorm = 0.3112, lr_0 = 2.4826e-04
Loss = 3.5487e-03, PNorm = 169.3415, GNorm = 0.3304, lr_0 = 2.4809e-04
Loss = 3.3361e-03, PNorm = 169.3502, GNorm = 0.1451, lr_0 = 2.4792e-04
Loss = 4.0453e-03, PNorm = 169.3552, GNorm = 0.2311, lr_0 = 2.4775e-04
Loss = 5.3311e-03, PNorm = 169.3627, GNorm = 0.2431, lr_0 = 2.4758e-04
Loss = 3.7871e-03, PNorm = 169.3717, GNorm = 0.1841, lr_0 = 2.4741e-04
Loss = 3.8850e-03, PNorm = 169.3779, GNorm = 0.2508, lr_0 = 2.4724e-04
Loss = 4.1915e-03, PNorm = 169.3877, GNorm = 0.2066, lr_0 = 2.4707e-04
Validation mae = 0.278432
Epoch 19
Loss = 3.0549e-03, PNorm = 169.3948, GNorm = 0.0683, lr_0 = 2.4690e-04
Loss = 3.0808e-03, PNorm = 169.4006, GNorm = 0.1358, lr_0 = 2.4674e-04
Loss = 2.8463e-03, PNorm = 169.4059, GNorm = 0.0856, lr_0 = 2.4657e-04
Loss = 5.6453e-03, PNorm = 169.4101, GNorm = 0.2343, lr_0 = 2.4640e-04
Loss = 2.9128e-03, PNorm = 169.4140, GNorm = 0.0663, lr_0 = 2.4623e-04
Loss = 2.7889e-03, PNorm = 169.4189, GNorm = 0.0378, lr_0 = 2.4606e-04
Loss = 3.0740e-03, PNorm = 169.4241, GNorm = 0.2203, lr_0 = 2.4589e-04
Loss = 2.9151e-03, PNorm = 169.4289, GNorm = 0.1140, lr_0 = 2.4572e-04
Loss = 3.5272e-03, PNorm = 169.4353, GNorm = 0.3413, lr_0 = 2.4556e-04
Loss = 3.5622e-03, PNorm = 169.4403, GNorm = 0.1380, lr_0 = 2.4539e-04
Loss = 3.1756e-03, PNorm = 169.4447, GNorm = 0.1319, lr_0 = 2.4522e-04
Loss = 4.7161e-03, PNorm = 169.4489, GNorm = 0.0965, lr_0 = 2.4505e-04
Loss = 2.7933e-03, PNorm = 169.4520, GNorm = 0.1510, lr_0 = 2.4488e-04
Loss = 2.7953e-03, PNorm = 169.4553, GNorm = 0.1380, lr_0 = 2.4472e-04
Loss = 3.3534e-03, PNorm = 169.4601, GNorm = 0.0857, lr_0 = 2.4455e-04
Loss = 2.3640e-03, PNorm = 169.4673, GNorm = 0.0606, lr_0 = 2.4438e-04
Loss = 3.1098e-03, PNorm = 169.4726, GNorm = 0.1653, lr_0 = 2.4421e-04
Loss = 2.6079e-03, PNorm = 169.4761, GNorm = 0.1806, lr_0 = 2.4405e-04
Loss = 3.7675e-03, PNorm = 169.4819, GNorm = 0.3520, lr_0 = 2.4388e-04
Loss = 3.3542e-03, PNorm = 169.4907, GNorm = 0.1090, lr_0 = 2.4371e-04
Loss = 3.8557e-03, PNorm = 169.4992, GNorm = 0.2915, lr_0 = 2.4354e-04
Loss = 2.6192e-03, PNorm = 169.5064, GNorm = 0.0701, lr_0 = 2.4338e-04
Loss = 3.8664e-03, PNorm = 169.5094, GNorm = 0.2040, lr_0 = 2.4321e-04
Loss = 3.2497e-03, PNorm = 169.5129, GNorm = 0.1790, lr_0 = 2.4304e-04
Loss = 3.3214e-03, PNorm = 169.5183, GNorm = 0.1807, lr_0 = 2.4288e-04
Loss = 3.4702e-03, PNorm = 169.5240, GNorm = 0.0926, lr_0 = 2.4271e-04
Loss = 2.6577e-03, PNorm = 169.5305, GNorm = 0.0715, lr_0 = 2.4254e-04
Loss = 3.1192e-03, PNorm = 169.5333, GNorm = 0.2922, lr_0 = 2.4238e-04
Loss = 3.4356e-03, PNorm = 169.5393, GNorm = 0.1722, lr_0 = 2.4221e-04
Loss = 2.9545e-03, PNorm = 169.5436, GNorm = 0.0945, lr_0 = 2.4205e-04
Loss = 3.7703e-03, PNorm = 169.5477, GNorm = 0.1620, lr_0 = 2.4188e-04
Loss = 3.5840e-03, PNorm = 169.5538, GNorm = 0.1792, lr_0 = 2.4171e-04
Loss = 2.4171e-03, PNorm = 169.5614, GNorm = 0.2247, lr_0 = 2.4155e-04
Loss = 2.6304e-03, PNorm = 169.5681, GNorm = 0.1345, lr_0 = 2.4138e-04
Loss = 4.4474e-03, PNorm = 169.5745, GNorm = 0.4525, lr_0 = 2.4122e-04
Loss = 3.3134e-03, PNorm = 169.5810, GNorm = 0.3893, lr_0 = 2.4105e-04
Loss = 4.2086e-03, PNorm = 169.5893, GNorm = 0.2035, lr_0 = 2.4089e-04
Loss = 2.8463e-03, PNorm = 169.5928, GNorm = 0.0988, lr_0 = 2.4072e-04
Loss = 3.8088e-03, PNorm = 169.5978, GNorm = 0.1030, lr_0 = 2.4056e-04
Loss = 2.8188e-03, PNorm = 169.6005, GNorm = 0.0688, lr_0 = 2.4039e-04
Loss = 4.3286e-03, PNorm = 169.6031, GNorm = 0.2802, lr_0 = 2.4023e-04
Loss = 3.0505e-03, PNorm = 169.6079, GNorm = 0.1558, lr_0 = 2.4006e-04
Loss = 3.8694e-03, PNorm = 169.6152, GNorm = 0.0629, lr_0 = 2.3990e-04
Loss = 3.1417e-03, PNorm = 169.6222, GNorm = 0.1469, lr_0 = 2.3974e-04
Loss = 2.9456e-03, PNorm = 169.6288, GNorm = 0.1319, lr_0 = 2.3957e-04
Loss = 3.7860e-03, PNorm = 169.6360, GNorm = 0.2663, lr_0 = 2.3941e-04
Loss = 3.3401e-03, PNorm = 169.6436, GNorm = 0.1505, lr_0 = 2.3924e-04
Loss = 2.8399e-03, PNorm = 169.6507, GNorm = 0.1208, lr_0 = 2.3908e-04
Loss = 2.3804e-03, PNorm = 169.6540, GNorm = 0.1298, lr_0 = 2.3892e-04
Loss = 2.4786e-03, PNorm = 169.6592, GNorm = 0.1993, lr_0 = 2.3875e-04
Loss = 3.6375e-03, PNorm = 169.6652, GNorm = 0.1099, lr_0 = 2.3859e-04
Loss = 3.1122e-03, PNorm = 169.6690, GNorm = 0.1747, lr_0 = 2.3842e-04
Loss = 2.7795e-03, PNorm = 169.6733, GNorm = 0.3260, lr_0 = 2.3826e-04
Loss = 3.8626e-03, PNorm = 169.6790, GNorm = 0.1644, lr_0 = 2.3810e-04
Loss = 5.6720e-03, PNorm = 169.6860, GNorm = 0.3036, lr_0 = 2.3794e-04
Loss = 3.7425e-03, PNorm = 169.6935, GNorm = 0.1976, lr_0 = 2.3777e-04
Loss = 2.7177e-03, PNorm = 169.7002, GNorm = 0.1942, lr_0 = 2.3761e-04
Loss = 2.3271e-03, PNorm = 169.7093, GNorm = 0.2763, lr_0 = 2.3745e-04
Loss = 3.7022e-03, PNorm = 169.7154, GNorm = 0.1720, lr_0 = 2.3728e-04
Loss = 3.5101e-03, PNorm = 169.7217, GNorm = 0.1190, lr_0 = 2.3712e-04
Loss = 3.6501e-03, PNorm = 169.7284, GNorm = 0.2082, lr_0 = 2.3696e-04
Loss = 4.1280e-03, PNorm = 169.7361, GNorm = 0.2321, lr_0 = 2.3680e-04
Loss = 4.9928e-03, PNorm = 169.7430, GNorm = 0.2352, lr_0 = 2.3663e-04
Loss = 2.4595e-03, PNorm = 169.7485, GNorm = 0.1047, lr_0 = 2.3647e-04
Loss = 2.8853e-03, PNorm = 169.7559, GNorm = 0.1191, lr_0 = 2.3631e-04
Loss = 3.5400e-03, PNorm = 169.7626, GNorm = 0.0570, lr_0 = 2.3615e-04
Loss = 2.9895e-03, PNorm = 169.7683, GNorm = 0.0647, lr_0 = 2.3599e-04
Loss = 2.7132e-03, PNorm = 169.7738, GNorm = 0.2855, lr_0 = 2.3582e-04
Loss = 5.4524e-03, PNorm = 169.7790, GNorm = 0.2589, lr_0 = 2.3566e-04
Loss = 3.5266e-03, PNorm = 169.7860, GNorm = 0.1711, lr_0 = 2.3550e-04
Loss = 3.2414e-03, PNorm = 169.7899, GNorm = 0.2012, lr_0 = 2.3534e-04
Loss = 3.4668e-03, PNorm = 169.7928, GNorm = 0.3367, lr_0 = 2.3518e-04
Loss = 3.3116e-03, PNorm = 169.7989, GNorm = 0.1771, lr_0 = 2.3502e-04
Loss = 2.5065e-03, PNorm = 169.8067, GNorm = 0.1423, lr_0 = 2.3486e-04
Loss = 4.0971e-03, PNorm = 169.8159, GNorm = 0.1868, lr_0 = 2.3470e-04
Loss = 3.0037e-03, PNorm = 169.8234, GNorm = 0.1422, lr_0 = 2.3454e-04
Loss = 3.6713e-03, PNorm = 169.8298, GNorm = 0.2425, lr_0 = 2.3437e-04
Loss = 3.3120e-03, PNorm = 169.8348, GNorm = 0.3899, lr_0 = 2.3421e-04
Loss = 3.1197e-03, PNorm = 169.8438, GNorm = 0.1684, lr_0 = 2.3405e-04
Loss = 6.5872e-03, PNorm = 169.8500, GNorm = 0.0972, lr_0 = 2.3389e-04
Loss = 3.3273e-03, PNorm = 169.8570, GNorm = 0.2303, lr_0 = 2.3373e-04
Loss = 3.5285e-03, PNorm = 169.8633, GNorm = 0.2951, lr_0 = 2.3357e-04
Loss = 2.4669e-03, PNorm = 169.8696, GNorm = 0.1247, lr_0 = 2.3341e-04
Loss = 2.4251e-03, PNorm = 169.8759, GNorm = 0.0602, lr_0 = 2.3325e-04
Loss = 3.4089e-03, PNorm = 169.8807, GNorm = 0.0813, lr_0 = 2.3309e-04
Loss = 3.8127e-03, PNorm = 169.8870, GNorm = 0.2900, lr_0 = 2.3293e-04
Loss = 2.5931e-03, PNorm = 169.8929, GNorm = 0.1760, lr_0 = 2.3277e-04
Loss = 2.6546e-03, PNorm = 169.9013, GNorm = 0.2662, lr_0 = 2.3261e-04
Loss = 2.9707e-03, PNorm = 169.9057, GNorm = 0.2098, lr_0 = 2.3246e-04
Loss = 4.6354e-03, PNorm = 169.9115, GNorm = 0.1650, lr_0 = 2.3230e-04
Loss = 3.9346e-03, PNorm = 169.9173, GNorm = 0.0704, lr_0 = 2.3214e-04
Loss = 3.1092e-03, PNorm = 169.9264, GNorm = 0.1505, lr_0 = 2.3198e-04
Loss = 4.2759e-03, PNorm = 169.9312, GNorm = 0.1866, lr_0 = 2.3182e-04
Loss = 5.6957e-03, PNorm = 169.9350, GNorm = 0.1471, lr_0 = 2.3166e-04
Loss = 7.1475e-03, PNorm = 169.9380, GNorm = 0.1838, lr_0 = 2.3150e-04
Loss = 2.6244e-03, PNorm = 169.9458, GNorm = 0.1833, lr_0 = 2.3134e-04
Loss = 3.7966e-03, PNorm = 169.9514, GNorm = 0.2335, lr_0 = 2.3118e-04
Loss = 4.0417e-03, PNorm = 169.9601, GNorm = 0.1822, lr_0 = 2.3103e-04
Loss = 2.9905e-03, PNorm = 169.9673, GNorm = 0.2314, lr_0 = 2.3087e-04
Loss = 2.5826e-03, PNorm = 169.9741, GNorm = 0.0815, lr_0 = 2.3071e-04
Loss = 4.0475e-03, PNorm = 169.9822, GNorm = 0.1588, lr_0 = 2.3055e-04
Loss = 3.5762e-03, PNorm = 169.9888, GNorm = 0.0800, lr_0 = 2.3039e-04
Loss = 2.9545e-03, PNorm = 169.9976, GNorm = 0.1089, lr_0 = 2.3024e-04
Loss = 4.1772e-03, PNorm = 170.0037, GNorm = 0.1280, lr_0 = 2.3008e-04
Loss = 3.0593e-03, PNorm = 170.0098, GNorm = 0.3891, lr_0 = 2.2992e-04
Loss = 2.3269e-03, PNorm = 170.0153, GNorm = 0.1353, lr_0 = 2.2976e-04
Loss = 3.9225e-03, PNorm = 170.0222, GNorm = 0.1012, lr_0 = 2.2961e-04
Loss = 2.9662e-03, PNorm = 170.0274, GNorm = 0.2105, lr_0 = 2.2945e-04
Loss = 2.4533e-03, PNorm = 170.0334, GNorm = 0.1391, lr_0 = 2.2929e-04
Loss = 3.1181e-03, PNorm = 170.0386, GNorm = 0.2878, lr_0 = 2.2913e-04
Loss = 3.6028e-03, PNorm = 170.0470, GNorm = 0.1805, lr_0 = 2.2898e-04
Loss = 4.4823e-03, PNorm = 170.0554, GNorm = 0.1123, lr_0 = 2.2882e-04
Loss = 3.7739e-03, PNorm = 170.0619, GNorm = 0.2893, lr_0 = 2.2866e-04
Loss = 8.6656e-03, PNorm = 170.0668, GNorm = 0.3689, lr_0 = 2.2851e-04
Loss = 3.3003e-03, PNorm = 170.0735, GNorm = 0.2799, lr_0 = 2.2835e-04
Loss = 4.8468e-03, PNorm = 170.0792, GNorm = 0.0853, lr_0 = 2.2819e-04
Loss = 4.0861e-03, PNorm = 170.0844, GNorm = 0.2125, lr_0 = 2.2804e-04
Loss = 3.3136e-03, PNorm = 170.0928, GNorm = 0.2723, lr_0 = 2.2788e-04
Loss = 3.1670e-03, PNorm = 170.1009, GNorm = 0.2527, lr_0 = 2.2773e-04
Loss = 5.7591e-03, PNorm = 170.1091, GNorm = 0.5973, lr_0 = 2.2757e-04
Validation mae = 0.278471
Epoch 20
Loss = 3.0981e-03, PNorm = 170.1153, GNorm = 0.2838, lr_0 = 2.2741e-04
Loss = 3.8339e-03, PNorm = 170.1215, GNorm = 0.1024, lr_0 = 2.2726e-04
Loss = 2.2005e-03, PNorm = 170.1280, GNorm = 0.3063, lr_0 = 2.2710e-04
Loss = 3.8596e-03, PNorm = 170.1345, GNorm = 0.1096, lr_0 = 2.2695e-04
Loss = 2.6267e-03, PNorm = 170.1399, GNorm = 0.1322, lr_0 = 2.2679e-04
Loss = 4.9849e-03, PNorm = 170.1426, GNorm = 0.2190, lr_0 = 2.2664e-04
Loss = 3.5737e-03, PNorm = 170.1454, GNorm = 0.2372, lr_0 = 2.2648e-04
Loss = 2.8844e-03, PNorm = 170.1494, GNorm = 0.2341, lr_0 = 2.2632e-04
Loss = 3.4777e-03, PNorm = 170.1571, GNorm = 0.1924, lr_0 = 2.2617e-04
Loss = 2.8980e-03, PNorm = 170.1647, GNorm = 0.2378, lr_0 = 2.2601e-04
Loss = 3.3947e-03, PNorm = 170.1728, GNorm = 0.2012, lr_0 = 2.2586e-04
Loss = 2.7415e-03, PNorm = 170.1794, GNorm = 0.0743, lr_0 = 2.2571e-04
Loss = 3.4863e-03, PNorm = 170.1855, GNorm = 0.4129, lr_0 = 2.2555e-04
Loss = 2.5219e-03, PNorm = 170.1894, GNorm = 0.1861, lr_0 = 2.2540e-04
Loss = 2.7276e-03, PNorm = 170.1927, GNorm = 0.2163, lr_0 = 2.2524e-04
Loss = 3.0677e-03, PNorm = 170.1970, GNorm = 0.1340, lr_0 = 2.2509e-04
Loss = 2.6346e-03, PNorm = 170.2035, GNorm = 0.1137, lr_0 = 2.2493e-04
Loss = 2.9886e-03, PNorm = 170.2078, GNorm = 0.0658, lr_0 = 2.2478e-04
Loss = 3.2042e-03, PNorm = 170.2134, GNorm = 0.1358, lr_0 = 2.2463e-04
Loss = 3.4343e-03, PNorm = 170.2195, GNorm = 0.2085, lr_0 = 2.2447e-04
Loss = 2.4139e-03, PNorm = 170.2245, GNorm = 0.2238, lr_0 = 2.2432e-04
Loss = 2.0780e-03, PNorm = 170.2297, GNorm = 0.2373, lr_0 = 2.2416e-04
Loss = 2.6793e-03, PNorm = 170.2349, GNorm = 0.3371, lr_0 = 2.2401e-04
Loss = 3.1725e-03, PNorm = 170.2395, GNorm = 0.0629, lr_0 = 2.2386e-04
Loss = 2.2199e-03, PNorm = 170.2449, GNorm = 0.1575, lr_0 = 2.2370e-04
Loss = 4.4634e-03, PNorm = 170.2502, GNorm = 0.0561, lr_0 = 2.2355e-04
Loss = 2.2260e-03, PNorm = 170.2542, GNorm = 0.2260, lr_0 = 2.2340e-04
Loss = 3.7715e-03, PNorm = 170.2582, GNorm = 0.0766, lr_0 = 2.2324e-04
Loss = 3.4251e-03, PNorm = 170.2604, GNorm = 0.0924, lr_0 = 2.2309e-04
Loss = 2.3824e-03, PNorm = 170.2652, GNorm = 0.0929, lr_0 = 2.2294e-04
Loss = 3.0193e-03, PNorm = 170.2699, GNorm = 0.1640, lr_0 = 2.2279e-04
Loss = 4.7602e-03, PNorm = 170.2752, GNorm = 0.0631, lr_0 = 2.2263e-04
Loss = 2.5863e-03, PNorm = 170.2804, GNorm = 0.1317, lr_0 = 2.2248e-04
Loss = 2.8008e-03, PNorm = 170.2862, GNorm = 0.1005, lr_0 = 2.2233e-04
Loss = 2.1834e-03, PNorm = 170.2913, GNorm = 0.2697, lr_0 = 2.2218e-04
Loss = 3.9948e-03, PNorm = 170.2942, GNorm = 0.0879, lr_0 = 2.2202e-04
Loss = 3.0267e-03, PNorm = 170.2982, GNorm = 0.1307, lr_0 = 2.2187e-04
Loss = 3.9469e-03, PNorm = 170.3042, GNorm = 0.3277, lr_0 = 2.2172e-04
Loss = 2.5371e-03, PNorm = 170.3098, GNorm = 0.1288, lr_0 = 2.2157e-04
Loss = 2.4228e-03, PNorm = 170.3158, GNorm = 0.1524, lr_0 = 2.2142e-04
Loss = 2.3122e-03, PNorm = 170.3227, GNorm = 0.1654, lr_0 = 2.2126e-04
Loss = 2.3735e-03, PNorm = 170.3257, GNorm = 0.1524, lr_0 = 2.2111e-04
Loss = 2.8908e-03, PNorm = 170.3299, GNorm = 0.1406, lr_0 = 2.2096e-04
Loss = 2.4146e-03, PNorm = 170.3360, GNorm = 0.0883, lr_0 = 2.2081e-04
Loss = 3.6515e-03, PNorm = 170.3446, GNorm = 0.0671, lr_0 = 2.2066e-04
Loss = 3.0263e-03, PNorm = 170.3503, GNorm = 0.2408, lr_0 = 2.2051e-04
Loss = 2.6397e-03, PNorm = 170.3578, GNorm = 0.0706, lr_0 = 2.2036e-04
Loss = 2.6435e-03, PNorm = 170.3642, GNorm = 0.0773, lr_0 = 2.2021e-04
Loss = 2.4779e-03, PNorm = 170.3687, GNorm = 0.0555, lr_0 = 2.2005e-04
Loss = 2.6225e-03, PNorm = 170.3730, GNorm = 0.0907, lr_0 = 2.1990e-04
Loss = 3.0043e-03, PNorm = 170.3774, GNorm = 0.1635, lr_0 = 2.1975e-04
Loss = 3.3076e-03, PNorm = 170.3822, GNorm = 0.1453, lr_0 = 2.1960e-04
Loss = 3.3523e-03, PNorm = 170.3880, GNorm = 0.1839, lr_0 = 2.1945e-04
Loss = 2.3783e-03, PNorm = 170.3944, GNorm = 0.1195, lr_0 = 2.1930e-04
Loss = 3.5968e-03, PNorm = 170.3999, GNorm = 0.0531, lr_0 = 2.1915e-04
Loss = 2.9598e-03, PNorm = 170.4051, GNorm = 0.2213, lr_0 = 2.1900e-04
Loss = 3.4770e-03, PNorm = 170.4108, GNorm = 0.1165, lr_0 = 2.1885e-04
Loss = 3.7217e-03, PNorm = 170.4141, GNorm = 0.0766, lr_0 = 2.1870e-04
Loss = 4.3013e-03, PNorm = 170.4195, GNorm = 0.1195, lr_0 = 2.1855e-04
Loss = 2.6469e-03, PNorm = 170.4233, GNorm = 0.0794, lr_0 = 2.1840e-04
Loss = 3.4909e-03, PNorm = 170.4271, GNorm = 0.1076, lr_0 = 2.1825e-04
Loss = 4.0493e-03, PNorm = 170.4311, GNorm = 0.2504, lr_0 = 2.1810e-04
Loss = 2.3296e-03, PNorm = 170.4357, GNorm = 0.1508, lr_0 = 2.1795e-04
Loss = 2.4649e-03, PNorm = 170.4409, GNorm = 0.0973, lr_0 = 2.1780e-04
Loss = 3.3882e-03, PNorm = 170.4456, GNorm = 0.4101, lr_0 = 2.1765e-04
Loss = 3.6414e-03, PNorm = 170.4490, GNorm = 0.1505, lr_0 = 2.1751e-04
Loss = 2.3760e-03, PNorm = 170.4552, GNorm = 0.2376, lr_0 = 2.1736e-04
Loss = 2.4897e-03, PNorm = 170.4590, GNorm = 0.0603, lr_0 = 2.1721e-04
Loss = 2.3658e-03, PNorm = 170.4656, GNorm = 0.0838, lr_0 = 2.1706e-04
Loss = 2.8642e-03, PNorm = 170.4748, GNorm = 0.1652, lr_0 = 2.1691e-04
Loss = 2.3095e-03, PNorm = 170.4818, GNorm = 0.0625, lr_0 = 2.1676e-04
Loss = 2.7488e-03, PNorm = 170.4862, GNorm = 0.4137, lr_0 = 2.1661e-04
Loss = 2.1431e-03, PNorm = 170.4883, GNorm = 0.0815, lr_0 = 2.1646e-04
Loss = 2.3184e-03, PNorm = 170.4927, GNorm = 0.0961, lr_0 = 2.1632e-04
Loss = 2.3915e-03, PNorm = 170.4984, GNorm = 0.1209, lr_0 = 2.1617e-04
Loss = 2.8046e-03, PNorm = 170.5032, GNorm = 0.0983, lr_0 = 2.1602e-04
Loss = 4.3488e-03, PNorm = 170.5081, GNorm = 0.1719, lr_0 = 2.1587e-04
Loss = 6.7404e-03, PNorm = 170.5127, GNorm = 0.1432, lr_0 = 2.1572e-04
Loss = 3.8708e-03, PNorm = 170.5170, GNorm = 0.1405, lr_0 = 2.1558e-04
Loss = 2.6382e-03, PNorm = 170.5218, GNorm = 0.1826, lr_0 = 2.1543e-04
Loss = 2.4197e-03, PNorm = 170.5285, GNorm = 0.0789, lr_0 = 2.1528e-04
Loss = 2.8069e-03, PNorm = 170.5344, GNorm = 0.0834, lr_0 = 2.1513e-04
Loss = 2.3119e-03, PNorm = 170.5397, GNorm = 0.1340, lr_0 = 2.1499e-04
Loss = 2.4464e-03, PNorm = 170.5445, GNorm = 0.1020, lr_0 = 2.1484e-04
Loss = 4.5515e-03, PNorm = 170.5483, GNorm = 0.2675, lr_0 = 2.1469e-04
Loss = 4.6671e-03, PNorm = 170.5561, GNorm = 0.1934, lr_0 = 2.1454e-04
Loss = 3.7070e-03, PNorm = 170.5644, GNorm = 0.5736, lr_0 = 2.1440e-04
Loss = 1.8037e-03, PNorm = 170.5702, GNorm = 0.1873, lr_0 = 2.1425e-04
Loss = 2.6120e-03, PNorm = 170.5740, GNorm = 0.1190, lr_0 = 2.1410e-04
Loss = 2.4479e-03, PNorm = 170.5779, GNorm = 0.0358, lr_0 = 2.1396e-04
Loss = 4.1032e-03, PNorm = 170.5846, GNorm = 0.1737, lr_0 = 2.1381e-04
Loss = 4.1740e-03, PNorm = 170.5912, GNorm = 0.3785, lr_0 = 2.1366e-04
Loss = 2.6113e-03, PNorm = 170.5964, GNorm = 0.1798, lr_0 = 2.1352e-04
Loss = 3.8142e-03, PNorm = 170.6014, GNorm = 0.0759, lr_0 = 2.1337e-04
Loss = 3.8715e-03, PNorm = 170.6059, GNorm = 0.0899, lr_0 = 2.1323e-04
Loss = 3.2587e-03, PNorm = 170.6111, GNorm = 0.1816, lr_0 = 2.1308e-04
Loss = 2.5845e-03, PNorm = 170.6185, GNorm = 0.1742, lr_0 = 2.1293e-04
Loss = 3.4651e-03, PNorm = 170.6226, GNorm = 0.0972, lr_0 = 2.1279e-04
Loss = 4.2416e-03, PNorm = 170.6288, GNorm = 0.5296, lr_0 = 2.1264e-04
Loss = 2.7486e-03, PNorm = 170.6328, GNorm = 0.1547, lr_0 = 2.1250e-04
Loss = 3.5847e-03, PNorm = 170.6381, GNorm = 0.1264, lr_0 = 2.1235e-04
Loss = 2.3242e-03, PNorm = 170.6467, GNorm = 0.1754, lr_0 = 2.1221e-04
Loss = 2.1433e-03, PNorm = 170.6544, GNorm = 0.1245, lr_0 = 2.1206e-04
Loss = 4.2935e-03, PNorm = 170.6588, GNorm = 0.0910, lr_0 = 2.1191e-04
Loss = 2.3148e-03, PNorm = 170.6653, GNorm = 0.0856, lr_0 = 2.1177e-04
Loss = 4.0144e-03, PNorm = 170.6689, GNorm = 0.3169, lr_0 = 2.1162e-04
Loss = 5.7097e-03, PNorm = 170.6761, GNorm = 0.2268, lr_0 = 2.1148e-04
Loss = 2.0919e-03, PNorm = 170.6817, GNorm = 0.0787, lr_0 = 2.1133e-04
Loss = 4.4598e-03, PNorm = 170.6876, GNorm = 0.1468, lr_0 = 2.1119e-04
Loss = 2.5897e-03, PNorm = 170.6916, GNorm = 0.1455, lr_0 = 2.1104e-04
Loss = 3.2683e-03, PNorm = 170.6973, GNorm = 0.2319, lr_0 = 2.1090e-04
Loss = 3.4443e-03, PNorm = 170.7065, GNorm = 0.0899, lr_0 = 2.1076e-04
Loss = 2.6743e-03, PNorm = 170.7141, GNorm = 0.0762, lr_0 = 2.1061e-04
Loss = 2.2972e-03, PNorm = 170.7209, GNorm = 0.0572, lr_0 = 2.1047e-04
Loss = 2.6151e-03, PNorm = 170.7270, GNorm = 0.1598, lr_0 = 2.1032e-04
Loss = 5.0230e-03, PNorm = 170.7307, GNorm = 0.3130, lr_0 = 2.1018e-04
Loss = 2.9456e-03, PNorm = 170.7362, GNorm = 0.1868, lr_0 = 2.1003e-04
Loss = 2.1041e-03, PNorm = 170.7412, GNorm = 0.1064, lr_0 = 2.0989e-04
Loss = 3.3316e-03, PNorm = 170.7464, GNorm = 0.2730, lr_0 = 2.0975e-04
Loss = 3.0061e-03, PNorm = 170.7523, GNorm = 0.1148, lr_0 = 2.0960e-04
Validation mae = 0.278395
Epoch 21
Loss = 2.3054e-03, PNorm = 170.7572, GNorm = 0.1402, lr_0 = 2.0946e-04
Loss = 3.4336e-03, PNorm = 170.7594, GNorm = 0.2351, lr_0 = 2.0932e-04
Loss = 2.1684e-03, PNorm = 170.7617, GNorm = 0.0716, lr_0 = 2.0917e-04
Loss = 2.4560e-03, PNorm = 170.7629, GNorm = 0.1089, lr_0 = 2.0903e-04
Loss = 2.1973e-03, PNorm = 170.7651, GNorm = 0.0461, lr_0 = 2.0889e-04
Loss = 3.9715e-03, PNorm = 170.7706, GNorm = 0.0564, lr_0 = 2.0874e-04
Loss = 2.4079e-03, PNorm = 170.7752, GNorm = 0.1817, lr_0 = 2.0860e-04
Loss = 3.4580e-03, PNorm = 170.7796, GNorm = 0.1619, lr_0 = 2.0846e-04
Loss = 2.4325e-03, PNorm = 170.7824, GNorm = 0.2406, lr_0 = 2.0831e-04
Loss = 2.3028e-03, PNorm = 170.7849, GNorm = 0.1327, lr_0 = 2.0817e-04
Loss = 3.8839e-03, PNorm = 170.7893, GNorm = 0.3026, lr_0 = 2.0803e-04
Loss = 3.7545e-03, PNorm = 170.7946, GNorm = 0.3301, lr_0 = 2.0789e-04
Loss = 2.3841e-03, PNorm = 170.7990, GNorm = 0.1616, lr_0 = 2.0774e-04
Loss = 4.9578e-03, PNorm = 170.8055, GNorm = 0.1772, lr_0 = 2.0760e-04
Loss = 2.1447e-03, PNorm = 170.8091, GNorm = 0.0753, lr_0 = 2.0746e-04
Loss = 2.1228e-03, PNorm = 170.8135, GNorm = 0.3236, lr_0 = 2.0732e-04
Loss = 1.6193e-03, PNorm = 170.8176, GNorm = 0.1708, lr_0 = 2.0718e-04
Loss = 2.4565e-03, PNorm = 170.8226, GNorm = 0.4208, lr_0 = 2.0703e-04
Loss = 2.3386e-03, PNorm = 170.8255, GNorm = 0.2165, lr_0 = 2.0689e-04
Loss = 2.4590e-03, PNorm = 170.8311, GNorm = 0.1359, lr_0 = 2.0675e-04
Loss = 1.7252e-03, PNorm = 170.8353, GNorm = 0.1470, lr_0 = 2.0661e-04
Loss = 2.0566e-03, PNorm = 170.8392, GNorm = 0.1622, lr_0 = 2.0647e-04
Loss = 1.8892e-03, PNorm = 170.8426, GNorm = 0.1606, lr_0 = 2.0633e-04
Loss = 2.5323e-03, PNorm = 170.8453, GNorm = 0.1812, lr_0 = 2.0618e-04
Loss = 2.2239e-03, PNorm = 170.8480, GNorm = 0.1241, lr_0 = 2.0604e-04
Loss = 4.3424e-03, PNorm = 170.8506, GNorm = 0.1598, lr_0 = 2.0590e-04
Loss = 3.0305e-03, PNorm = 170.8547, GNorm = 0.1881, lr_0 = 2.0576e-04
Loss = 3.7692e-03, PNorm = 170.8587, GNorm = 0.4470, lr_0 = 2.0562e-04
Loss = 1.6168e-03, PNorm = 170.8641, GNorm = 0.0534, lr_0 = 2.0548e-04
Loss = 2.6118e-03, PNorm = 170.8674, GNorm = 0.1803, lr_0 = 2.0534e-04
Loss = 2.1358e-03, PNorm = 170.8712, GNorm = 0.1342, lr_0 = 2.0520e-04
Loss = 2.4616e-03, PNorm = 170.8747, GNorm = 0.1113, lr_0 = 2.0506e-04
Loss = 2.0716e-03, PNorm = 170.8788, GNorm = 0.1318, lr_0 = 2.0492e-04
Loss = 2.8506e-03, PNorm = 170.8825, GNorm = 0.1240, lr_0 = 2.0478e-04
Loss = 2.8598e-03, PNorm = 170.8885, GNorm = 0.0837, lr_0 = 2.0464e-04
Loss = 1.8801e-03, PNorm = 170.8948, GNorm = 0.2614, lr_0 = 2.0450e-04
Loss = 3.8559e-03, PNorm = 170.9002, GNorm = 0.2021, lr_0 = 2.0436e-04
Loss = 2.5574e-03, PNorm = 170.9036, GNorm = 0.2473, lr_0 = 2.0422e-04
Loss = 2.9189e-03, PNorm = 170.9040, GNorm = 0.0946, lr_0 = 2.0408e-04
Loss = 2.7598e-03, PNorm = 170.9077, GNorm = 0.1452, lr_0 = 2.0394e-04
Loss = 5.0199e-03, PNorm = 170.9122, GNorm = 0.0640, lr_0 = 2.0380e-04
Loss = 2.9479e-03, PNorm = 170.9177, GNorm = 0.1992, lr_0 = 2.0366e-04
Loss = 2.3447e-03, PNorm = 170.9205, GNorm = 0.1621, lr_0 = 2.0352e-04
Loss = 5.0351e-03, PNorm = 170.9219, GNorm = 0.5778, lr_0 = 2.0338e-04
Loss = 2.6085e-03, PNorm = 170.9248, GNorm = 0.2339, lr_0 = 2.0324e-04
Loss = 4.7123e-03, PNorm = 170.9299, GNorm = 0.1133, lr_0 = 2.0310e-04
Loss = 3.1666e-03, PNorm = 170.9353, GNorm = 0.1202, lr_0 = 2.0296e-04
Loss = 2.0730e-03, PNorm = 170.9404, GNorm = 0.1108, lr_0 = 2.0282e-04
Loss = 2.7302e-03, PNorm = 170.9437, GNorm = 0.1190, lr_0 = 2.0268e-04
Loss = 2.1456e-03, PNorm = 170.9482, GNorm = 0.1641, lr_0 = 2.0254e-04
Loss = 2.3422e-03, PNorm = 170.9519, GNorm = 0.2028, lr_0 = 2.0240e-04
Loss = 2.3127e-03, PNorm = 170.9570, GNorm = 0.2094, lr_0 = 2.0227e-04
Loss = 1.6706e-03, PNorm = 170.9594, GNorm = 0.0557, lr_0 = 2.0213e-04
Loss = 2.6340e-03, PNorm = 170.9635, GNorm = 0.2354, lr_0 = 2.0199e-04
Loss = 2.9202e-03, PNorm = 170.9633, GNorm = 0.2206, lr_0 = 2.0185e-04
Loss = 2.0570e-03, PNorm = 170.9680, GNorm = 0.1445, lr_0 = 2.0171e-04
Loss = 1.8992e-03, PNorm = 170.9739, GNorm = 0.1087, lr_0 = 2.0157e-04
Loss = 3.0561e-03, PNorm = 170.9800, GNorm = 0.2536, lr_0 = 2.0144e-04
Loss = 2.3859e-03, PNorm = 170.9868, GNorm = 0.1163, lr_0 = 2.0130e-04
Loss = 2.5128e-03, PNorm = 170.9913, GNorm = 0.1540, lr_0 = 2.0116e-04
Loss = 2.3448e-03, PNorm = 170.9949, GNorm = 0.0855, lr_0 = 2.0102e-04
Loss = 2.2567e-03, PNorm = 170.9995, GNorm = 0.2250, lr_0 = 2.0088e-04
Loss = 2.3871e-03, PNorm = 171.0042, GNorm = 0.2058, lr_0 = 2.0075e-04
Loss = 3.4515e-03, PNorm = 171.0090, GNorm = 0.1027, lr_0 = 2.0061e-04
Loss = 2.1826e-03, PNorm = 171.0139, GNorm = 0.1470, lr_0 = 2.0047e-04
Loss = 3.0067e-03, PNorm = 171.0205, GNorm = 0.3286, lr_0 = 2.0033e-04
Loss = 2.0960e-03, PNorm = 171.0240, GNorm = 0.1184, lr_0 = 2.0020e-04
Loss = 1.9961e-03, PNorm = 171.0289, GNorm = 0.0433, lr_0 = 2.0006e-04
Loss = 2.0166e-03, PNorm = 171.0355, GNorm = 0.2165, lr_0 = 1.9992e-04
Loss = 2.2198e-03, PNorm = 171.0426, GNorm = 0.2403, lr_0 = 1.9979e-04
Loss = 3.3968e-03, PNorm = 171.0471, GNorm = 0.1131, lr_0 = 1.9965e-04
Loss = 2.9577e-03, PNorm = 171.0520, GNorm = 0.2582, lr_0 = 1.9951e-04
Loss = 2.5119e-03, PNorm = 171.0555, GNorm = 0.2028, lr_0 = 1.9938e-04
Loss = 2.9124e-03, PNorm = 171.0587, GNorm = 0.0672, lr_0 = 1.9924e-04
Loss = 4.1667e-03, PNorm = 171.0605, GNorm = 0.2303, lr_0 = 1.9910e-04
Loss = 2.3897e-03, PNorm = 171.0642, GNorm = 0.1145, lr_0 = 1.9897e-04
Loss = 3.4793e-03, PNorm = 171.0672, GNorm = 0.2047, lr_0 = 1.9883e-04
Loss = 2.5708e-03, PNorm = 171.0713, GNorm = 0.1465, lr_0 = 1.9869e-04
Loss = 1.5657e-03, PNorm = 171.0744, GNorm = 0.2963, lr_0 = 1.9856e-04
Loss = 4.2107e-03, PNorm = 171.0780, GNorm = 0.4731, lr_0 = 1.9842e-04
Loss = 2.7093e-03, PNorm = 171.0821, GNorm = 0.3031, lr_0 = 1.9829e-04
Loss = 2.1165e-03, PNorm = 171.0860, GNorm = 0.0477, lr_0 = 1.9815e-04
Loss = 3.0028e-03, PNorm = 171.0906, GNorm = 0.3784, lr_0 = 1.9801e-04
Loss = 2.5713e-03, PNorm = 171.0973, GNorm = 0.1391, lr_0 = 1.9788e-04
Loss = 1.6673e-03, PNorm = 171.1051, GNorm = 0.1150, lr_0 = 1.9774e-04
Loss = 2.2578e-03, PNorm = 171.1112, GNorm = 0.2110, lr_0 = 1.9761e-04
Loss = 2.5757e-03, PNorm = 171.1148, GNorm = 0.1841, lr_0 = 1.9747e-04
Loss = 3.0623e-03, PNorm = 171.1200, GNorm = 0.0823, lr_0 = 1.9734e-04
Loss = 2.2862e-03, PNorm = 171.1224, GNorm = 0.1741, lr_0 = 1.9720e-04
Loss = 2.7477e-03, PNorm = 171.1251, GNorm = 0.1683, lr_0 = 1.9707e-04
Loss = 2.3882e-03, PNorm = 171.1278, GNorm = 0.1946, lr_0 = 1.9693e-04
Loss = 1.8207e-03, PNorm = 171.1312, GNorm = 0.1912, lr_0 = 1.9680e-04
Loss = 2.0099e-03, PNorm = 171.1364, GNorm = 0.0950, lr_0 = 1.9666e-04
Loss = 2.1317e-03, PNorm = 171.1403, GNorm = 0.1389, lr_0 = 1.9653e-04
Loss = 2.3258e-03, PNorm = 171.1456, GNorm = 0.0631, lr_0 = 1.9639e-04
Loss = 3.0974e-03, PNorm = 171.1521, GNorm = 0.0619, lr_0 = 1.9626e-04
Loss = 3.8091e-03, PNorm = 171.1585, GNorm = 0.2563, lr_0 = 1.9612e-04
Loss = 2.0543e-03, PNorm = 171.1629, GNorm = 0.0762, lr_0 = 1.9599e-04
Loss = 2.1188e-03, PNorm = 171.1684, GNorm = 0.0694, lr_0 = 1.9585e-04
Loss = 3.2020e-03, PNorm = 171.1727, GNorm = 0.0885, lr_0 = 1.9572e-04
Loss = 2.4685e-03, PNorm = 171.1782, GNorm = 0.2304, lr_0 = 1.9559e-04
Loss = 3.6878e-03, PNorm = 171.1829, GNorm = 0.2608, lr_0 = 1.9545e-04
Loss = 2.9888e-03, PNorm = 171.1875, GNorm = 0.0793, lr_0 = 1.9532e-04
Loss = 2.7412e-03, PNorm = 171.1909, GNorm = 0.1291, lr_0 = 1.9518e-04
Loss = 2.2921e-03, PNorm = 171.1958, GNorm = 0.3593, lr_0 = 1.9505e-04
Loss = 2.0060e-03, PNorm = 171.2005, GNorm = 0.0931, lr_0 = 1.9492e-04
Loss = 2.4118e-03, PNorm = 171.2058, GNorm = 0.0485, lr_0 = 1.9478e-04
Loss = 2.3430e-03, PNorm = 171.2101, GNorm = 0.0899, lr_0 = 1.9465e-04
Loss = 2.2285e-03, PNorm = 171.2146, GNorm = 0.0483, lr_0 = 1.9452e-04
Loss = 3.6675e-03, PNorm = 171.2181, GNorm = 0.0498, lr_0 = 1.9438e-04
Loss = 2.5996e-03, PNorm = 171.2225, GNorm = 0.1535, lr_0 = 1.9425e-04
Loss = 1.8865e-03, PNorm = 171.2278, GNorm = 0.1234, lr_0 = 1.9412e-04
Loss = 2.1483e-03, PNorm = 171.2330, GNorm = 0.0998, lr_0 = 1.9398e-04
Loss = 2.1280e-03, PNorm = 171.2361, GNorm = 0.0530, lr_0 = 1.9385e-04
Loss = 3.1115e-03, PNorm = 171.2415, GNorm = 0.1789, lr_0 = 1.9372e-04
Loss = 5.2834e-03, PNorm = 171.2436, GNorm = 0.7717, lr_0 = 1.9359e-04
Loss = 2.1358e-03, PNorm = 171.2514, GNorm = 0.1802, lr_0 = 1.9345e-04
Loss = 2.8736e-03, PNorm = 171.2566, GNorm = 0.1398, lr_0 = 1.9332e-04
Loss = 2.9265e-03, PNorm = 171.2604, GNorm = 0.0824, lr_0 = 1.9319e-04
Loss = 4.4969e-03, PNorm = 171.2642, GNorm = 0.0740, lr_0 = 1.9306e-04
Validation mae = 0.278368
Epoch 22
Loss = 1.6108e-03, PNorm = 171.2661, GNorm = 0.1261, lr_0 = 1.9292e-04
Loss = 2.8429e-03, PNorm = 171.2682, GNorm = 0.0848, lr_0 = 1.9279e-04
Loss = 1.9519e-03, PNorm = 171.2717, GNorm = 0.0997, lr_0 = 1.9266e-04
Loss = 1.9891e-03, PNorm = 171.2746, GNorm = 0.1228, lr_0 = 1.9253e-04
Loss = 3.0750e-03, PNorm = 171.2772, GNorm = 0.1708, lr_0 = 1.9240e-04
Loss = 2.8931e-03, PNorm = 171.2811, GNorm = 0.1401, lr_0 = 1.9226e-04
Loss = 2.2738e-03, PNorm = 171.2847, GNorm = 0.1068, lr_0 = 1.9213e-04
Loss = 1.9783e-03, PNorm = 171.2878, GNorm = 0.1119, lr_0 = 1.9200e-04
Loss = 2.3917e-03, PNorm = 171.2920, GNorm = 0.0765, lr_0 = 1.9187e-04
Loss = 1.8879e-03, PNorm = 171.2979, GNorm = 0.2437, lr_0 = 1.9174e-04
Loss = 1.6013e-03, PNorm = 171.3004, GNorm = 0.0407, lr_0 = 1.9161e-04
Loss = 2.2470e-03, PNorm = 171.3026, GNorm = 0.1282, lr_0 = 1.9148e-04
Loss = 1.5042e-03, PNorm = 171.3042, GNorm = 0.0680, lr_0 = 1.9134e-04
Loss = 1.5123e-03, PNorm = 171.3066, GNorm = 0.1475, lr_0 = 1.9121e-04
Loss = 1.7205e-03, PNorm = 171.3104, GNorm = 0.1691, lr_0 = 1.9108e-04
Loss = 2.9631e-03, PNorm = 171.3158, GNorm = 0.0630, lr_0 = 1.9095e-04
Loss = 2.3765e-03, PNorm = 171.3204, GNorm = 0.2613, lr_0 = 1.9082e-04
Loss = 1.6167e-03, PNorm = 171.3239, GNorm = 0.1990, lr_0 = 1.9069e-04
Loss = 4.6292e-03, PNorm = 171.3246, GNorm = 0.3031, lr_0 = 1.9056e-04
Loss = 2.9113e-03, PNorm = 171.3276, GNorm = 0.1915, lr_0 = 1.9043e-04
Loss = 1.8379e-03, PNorm = 171.3306, GNorm = 0.2392, lr_0 = 1.9030e-04
Loss = 2.4374e-03, PNorm = 171.3336, GNorm = 0.1411, lr_0 = 1.9017e-04
Loss = 1.5749e-03, PNorm = 171.3356, GNorm = 0.2596, lr_0 = 1.9004e-04
Loss = 2.0230e-03, PNorm = 171.3379, GNorm = 0.1564, lr_0 = 1.8991e-04
Loss = 1.6792e-03, PNorm = 171.3415, GNorm = 0.1964, lr_0 = 1.8978e-04
Loss = 2.3504e-03, PNorm = 171.3470, GNorm = 0.1451, lr_0 = 1.8965e-04
Loss = 1.6796e-03, PNorm = 171.3508, GNorm = 0.1976, lr_0 = 1.8952e-04
Loss = 1.8904e-03, PNorm = 171.3539, GNorm = 0.1422, lr_0 = 1.8939e-04
Loss = 1.9250e-03, PNorm = 171.3576, GNorm = 0.0886, lr_0 = 1.8926e-04
Loss = 1.4856e-03, PNorm = 171.3613, GNorm = 0.0403, lr_0 = 1.8913e-04
Loss = 2.3491e-03, PNorm = 171.3635, GNorm = 0.0661, lr_0 = 1.8900e-04
Loss = 2.1013e-03, PNorm = 171.3660, GNorm = 0.2204, lr_0 = 1.8887e-04
Loss = 1.7576e-03, PNorm = 171.3711, GNorm = 0.1889, lr_0 = 1.8874e-04
Loss = 2.3910e-03, PNorm = 171.3754, GNorm = 0.1634, lr_0 = 1.8861e-04
Loss = 2.4147e-03, PNorm = 171.3802, GNorm = 0.0734, lr_0 = 1.8848e-04
Loss = 1.6424e-03, PNorm = 171.3838, GNorm = 0.1480, lr_0 = 1.8835e-04
Loss = 2.5848e-03, PNorm = 171.3868, GNorm = 0.1046, lr_0 = 1.8822e-04
Loss = 2.9051e-03, PNorm = 171.3887, GNorm = 0.2064, lr_0 = 1.8809e-04
Loss = 1.6496e-03, PNorm = 171.3917, GNorm = 0.2201, lr_0 = 1.8797e-04
Loss = 1.9234e-03, PNorm = 171.3956, GNorm = 0.0345, lr_0 = 1.8784e-04
Loss = 2.7331e-03, PNorm = 171.3991, GNorm = 0.1061, lr_0 = 1.8771e-04
Loss = 1.8941e-03, PNorm = 171.4023, GNorm = 0.0891, lr_0 = 1.8758e-04
Loss = 2.3022e-03, PNorm = 171.4035, GNorm = 0.2599, lr_0 = 1.8745e-04
Loss = 3.6607e-03, PNorm = 171.4056, GNorm = 0.0666, lr_0 = 1.8732e-04
Loss = 4.3832e-03, PNorm = 171.4105, GNorm = 0.1660, lr_0 = 1.8719e-04
Loss = 2.5455e-03, PNorm = 171.4148, GNorm = 0.0947, lr_0 = 1.8707e-04
Loss = 2.4370e-03, PNorm = 171.4204, GNorm = 0.0679, lr_0 = 1.8694e-04
Loss = 3.8190e-03, PNorm = 171.4234, GNorm = 0.0602, lr_0 = 1.8681e-04
Loss = 1.8195e-03, PNorm = 171.4265, GNorm = 0.2117, lr_0 = 1.8668e-04
Loss = 1.8922e-03, PNorm = 171.4306, GNorm = 0.0930, lr_0 = 1.8655e-04
Loss = 1.9483e-03, PNorm = 171.4366, GNorm = 0.1584, lr_0 = 1.8643e-04
Loss = 1.5985e-03, PNorm = 171.4397, GNorm = 0.1099, lr_0 = 1.8630e-04
Loss = 2.2280e-03, PNorm = 171.4446, GNorm = 0.0306, lr_0 = 1.8617e-04
Loss = 1.6199e-03, PNorm = 171.4473, GNorm = 0.0435, lr_0 = 1.8604e-04
Loss = 1.9215e-03, PNorm = 171.4517, GNorm = 0.1537, lr_0 = 1.8592e-04
Loss = 3.0020e-03, PNorm = 171.4534, GNorm = 0.1805, lr_0 = 1.8579e-04
Loss = 2.4601e-03, PNorm = 171.4579, GNorm = 0.1953, lr_0 = 1.8566e-04
Loss = 2.4914e-03, PNorm = 171.4598, GNorm = 0.2224, lr_0 = 1.8553e-04
Loss = 3.3489e-03, PNorm = 171.4629, GNorm = 0.2091, lr_0 = 1.8541e-04
Loss = 1.9821e-03, PNorm = 171.4671, GNorm = 0.0394, lr_0 = 1.8528e-04
Loss = 2.6884e-03, PNorm = 171.4724, GNorm = 0.2869, lr_0 = 1.8515e-04
Loss = 2.0016e-03, PNorm = 171.4768, GNorm = 0.0456, lr_0 = 1.8503e-04
Loss = 2.2991e-03, PNorm = 171.4821, GNorm = 0.0770, lr_0 = 1.8490e-04
Loss = 1.7148e-03, PNorm = 171.4855, GNorm = 0.1704, lr_0 = 1.8477e-04
Loss = 2.9169e-03, PNorm = 171.4902, GNorm = 0.2105, lr_0 = 1.8465e-04
Loss = 3.4697e-03, PNorm = 171.4927, GNorm = 0.1584, lr_0 = 1.8452e-04
Loss = 3.2334e-03, PNorm = 171.4966, GNorm = 0.1082, lr_0 = 1.8439e-04
Loss = 2.2727e-03, PNorm = 171.4982, GNorm = 0.1948, lr_0 = 1.8427e-04
Loss = 2.3802e-03, PNorm = 171.5006, GNorm = 0.1512, lr_0 = 1.8414e-04
Loss = 2.2875e-03, PNorm = 171.5037, GNorm = 0.1179, lr_0 = 1.8401e-04
Loss = 3.3717e-03, PNorm = 171.5069, GNorm = 0.0753, lr_0 = 1.8389e-04
Loss = 1.8900e-03, PNorm = 171.5108, GNorm = 0.1612, lr_0 = 1.8376e-04
Loss = 2.0463e-03, PNorm = 171.5178, GNorm = 0.0875, lr_0 = 1.8364e-04
Loss = 1.5860e-03, PNorm = 171.5242, GNorm = 0.0874, lr_0 = 1.8351e-04
Loss = 2.1085e-03, PNorm = 171.5297, GNorm = 0.1565, lr_0 = 1.8338e-04
Loss = 1.8201e-03, PNorm = 171.5351, GNorm = 0.2258, lr_0 = 1.8326e-04
Loss = 2.6886e-03, PNorm = 171.5375, GNorm = 0.1177, lr_0 = 1.8313e-04
Loss = 1.8042e-03, PNorm = 171.5414, GNorm = 0.0821, lr_0 = 1.8301e-04
Loss = 1.9484e-03, PNorm = 171.5447, GNorm = 0.0955, lr_0 = 1.8288e-04
Loss = 3.5907e-03, PNorm = 171.5458, GNorm = 0.2714, lr_0 = 1.8276e-04
Loss = 2.3592e-03, PNorm = 171.5489, GNorm = 0.1128, lr_0 = 1.8263e-04
Loss = 1.7010e-03, PNorm = 171.5521, GNorm = 0.1380, lr_0 = 1.8251e-04
Loss = 1.8938e-03, PNorm = 171.5562, GNorm = 0.1591, lr_0 = 1.8238e-04
Loss = 2.2176e-03, PNorm = 171.5604, GNorm = 0.1399, lr_0 = 1.8226e-04
Loss = 3.2080e-03, PNorm = 171.5659, GNorm = 0.0746, lr_0 = 1.8213e-04
Loss = 1.7362e-03, PNorm = 171.5701, GNorm = 0.0922, lr_0 = 1.8201e-04
Loss = 7.1182e-03, PNorm = 171.5731, GNorm = 0.0412, lr_0 = 1.8188e-04
Loss = 2.1918e-03, PNorm = 171.5762, GNorm = 0.1770, lr_0 = 1.8176e-04
Loss = 2.2534e-03, PNorm = 171.5787, GNorm = 0.1718, lr_0 = 1.8163e-04
Loss = 2.3504e-03, PNorm = 171.5838, GNorm = 0.1979, lr_0 = 1.8151e-04
Loss = 1.7003e-03, PNorm = 171.5880, GNorm = 0.1497, lr_0 = 1.8138e-04
Loss = 2.1182e-03, PNorm = 171.5912, GNorm = 0.1729, lr_0 = 1.8126e-04
Loss = 1.3543e-03, PNorm = 171.5942, GNorm = 0.1435, lr_0 = 1.8114e-04
Loss = 1.4828e-03, PNorm = 171.5975, GNorm = 0.2753, lr_0 = 1.8101e-04
Loss = 2.0786e-03, PNorm = 171.6015, GNorm = 0.0987, lr_0 = 1.8089e-04
Loss = 2.2209e-03, PNorm = 171.6052, GNorm = 0.1221, lr_0 = 1.8076e-04
Loss = 1.4373e-03, PNorm = 171.6084, GNorm = 0.1634, lr_0 = 1.8064e-04
Loss = 1.5732e-03, PNorm = 171.6104, GNorm = 0.1424, lr_0 = 1.8052e-04
Loss = 3.9773e-03, PNorm = 171.6128, GNorm = 0.1432, lr_0 = 1.8039e-04
Loss = 2.8272e-03, PNorm = 171.6155, GNorm = 0.2102, lr_0 = 1.8027e-04
Loss = 2.2562e-03, PNorm = 171.6177, GNorm = 0.1039, lr_0 = 1.8015e-04
Loss = 4.1571e-03, PNorm = 171.6211, GNorm = 0.0401, lr_0 = 1.8002e-04
Loss = 2.2973e-03, PNorm = 171.6267, GNorm = 0.2949, lr_0 = 1.7990e-04
Loss = 4.3473e-03, PNorm = 171.6304, GNorm = 0.1842, lr_0 = 1.7978e-04
Loss = 2.5564e-03, PNorm = 171.6342, GNorm = 0.1175, lr_0 = 1.7965e-04
Loss = 1.6510e-03, PNorm = 171.6383, GNorm = 0.1058, lr_0 = 1.7953e-04
Loss = 1.5632e-03, PNorm = 171.6434, GNorm = 0.0583, lr_0 = 1.7941e-04
Loss = 4.2215e-03, PNorm = 171.6487, GNorm = 0.1054, lr_0 = 1.7928e-04
Loss = 2.3754e-03, PNorm = 171.6519, GNorm = 0.1559, lr_0 = 1.7916e-04
Loss = 3.8973e-03, PNorm = 171.6554, GNorm = 0.1543, lr_0 = 1.7904e-04
Loss = 3.6216e-03, PNorm = 171.6581, GNorm = 0.1466, lr_0 = 1.7892e-04
Loss = 1.9571e-03, PNorm = 171.6630, GNorm = 0.1450, lr_0 = 1.7879e-04
Loss = 1.5498e-03, PNorm = 171.6681, GNorm = 0.0759, lr_0 = 1.7867e-04
Loss = 1.9307e-03, PNorm = 171.6725, GNorm = 0.1044, lr_0 = 1.7855e-04
Loss = 2.1375e-03, PNorm = 171.6763, GNorm = 0.1148, lr_0 = 1.7843e-04
Loss = 1.7262e-03, PNorm = 171.6792, GNorm = 0.1325, lr_0 = 1.7830e-04
Loss = 3.6300e-03, PNorm = 171.6825, GNorm = 0.1007, lr_0 = 1.7818e-04
Loss = 3.7512e-03, PNorm = 171.6872, GNorm = 0.2223, lr_0 = 1.7806e-04
Loss = 2.1166e-03, PNorm = 171.6913, GNorm = 0.2691, lr_0 = 1.7794e-04
Loss = 1.5294e-03, PNorm = 171.6962, GNorm = 0.2028, lr_0 = 1.7782e-04
Validation mae = 0.278161
Epoch 23
Loss = 1.3346e-03, PNorm = 171.6997, GNorm = 0.0814, lr_0 = 1.7769e-04
Loss = 2.0828e-03, PNorm = 171.7021, GNorm = 0.2046, lr_0 = 1.7757e-04
Loss = 1.9191e-03, PNorm = 171.7051, GNorm = 0.1809, lr_0 = 1.7745e-04
Loss = 2.0689e-03, PNorm = 171.7072, GNorm = 0.1664, lr_0 = 1.7733e-04
Loss = 1.5107e-03, PNorm = 171.7100, GNorm = 0.1317, lr_0 = 1.7721e-04
Loss = 2.1582e-03, PNorm = 171.7124, GNorm = 0.0862, lr_0 = 1.7709e-04
Loss = 1.1916e-03, PNorm = 171.7151, GNorm = 0.0673, lr_0 = 1.7696e-04
Loss = 2.2877e-03, PNorm = 171.7183, GNorm = 0.0937, lr_0 = 1.7684e-04
Loss = 3.4385e-03, PNorm = 171.7229, GNorm = 0.1717, lr_0 = 1.7672e-04
Loss = 1.5299e-03, PNorm = 171.7278, GNorm = 0.0514, lr_0 = 1.7660e-04
Loss = 1.7961e-03, PNorm = 171.7299, GNorm = 0.1159, lr_0 = 1.7648e-04
Loss = 1.7640e-03, PNorm = 171.7321, GNorm = 0.1064, lr_0 = 1.7636e-04
Loss = 1.9265e-03, PNorm = 171.7357, GNorm = 0.1431, lr_0 = 1.7624e-04
Loss = 2.4622e-03, PNorm = 171.7394, GNorm = 0.1248, lr_0 = 1.7612e-04
Loss = 1.9312e-03, PNorm = 171.7426, GNorm = 0.1339, lr_0 = 1.7600e-04
Loss = 2.0536e-03, PNorm = 171.7445, GNorm = 0.0965, lr_0 = 1.7588e-04
Loss = 2.1913e-03, PNorm = 171.7456, GNorm = 0.1276, lr_0 = 1.7576e-04
Loss = 1.2468e-03, PNorm = 171.7470, GNorm = 0.0698, lr_0 = 1.7564e-04
Loss = 2.6360e-03, PNorm = 171.7495, GNorm = 0.0852, lr_0 = 1.7552e-04
Loss = 2.8415e-03, PNorm = 171.7513, GNorm = 0.0499, lr_0 = 1.7540e-04
Loss = 2.2353e-03, PNorm = 171.7535, GNorm = 0.0836, lr_0 = 1.7528e-04
Loss = 1.5599e-03, PNorm = 171.7546, GNorm = 0.1283, lr_0 = 1.7516e-04
Loss = 1.4701e-03, PNorm = 171.7567, GNorm = 0.0708, lr_0 = 1.7504e-04
Loss = 2.8032e-03, PNorm = 171.7573, GNorm = 0.2141, lr_0 = 1.7492e-04
Loss = 1.9186e-03, PNorm = 171.7593, GNorm = 0.0595, lr_0 = 1.7480e-04
Loss = 1.2911e-03, PNorm = 171.7625, GNorm = 0.1606, lr_0 = 1.7468e-04
Loss = 1.8572e-03, PNorm = 171.7652, GNorm = 0.1554, lr_0 = 1.7456e-04
Loss = 1.6011e-03, PNorm = 171.7670, GNorm = 0.0438, lr_0 = 1.7444e-04
Loss = 1.3524e-03, PNorm = 171.7711, GNorm = 0.0774, lr_0 = 1.7432e-04
Loss = 1.5501e-03, PNorm = 171.7740, GNorm = 0.1006, lr_0 = 1.7420e-04
Loss = 2.2263e-03, PNorm = 171.7783, GNorm = 0.1008, lr_0 = 1.7408e-04
Loss = 1.8525e-03, PNorm = 171.7814, GNorm = 0.2043, lr_0 = 1.7396e-04
Loss = 1.4375e-03, PNorm = 171.7847, GNorm = 0.2229, lr_0 = 1.7384e-04
Loss = 1.5083e-03, PNorm = 171.7882, GNorm = 0.1519, lr_0 = 1.7372e-04
Loss = 1.9147e-03, PNorm = 171.7922, GNorm = 0.0379, lr_0 = 1.7360e-04
Loss = 1.4320e-03, PNorm = 171.7955, GNorm = 0.1114, lr_0 = 1.7348e-04
Loss = 3.1013e-03, PNorm = 171.7995, GNorm = 0.1234, lr_0 = 1.7336e-04
Loss = 1.9569e-03, PNorm = 171.8033, GNorm = 0.0390, lr_0 = 1.7325e-04
Loss = 3.3032e-03, PNorm = 171.8088, GNorm = 0.2272, lr_0 = 1.7313e-04
Loss = 2.5515e-03, PNorm = 171.8125, GNorm = 0.1873, lr_0 = 1.7301e-04
Loss = 1.6788e-03, PNorm = 171.8138, GNorm = 0.1038, lr_0 = 1.7289e-04
Loss = 1.3382e-03, PNorm = 171.8144, GNorm = 0.1275, lr_0 = 1.7277e-04
Loss = 2.7179e-03, PNorm = 171.8162, GNorm = 0.0743, lr_0 = 1.7265e-04
Loss = 1.6477e-03, PNorm = 171.8184, GNorm = 0.0730, lr_0 = 1.7253e-04
Loss = 2.1637e-03, PNorm = 171.8227, GNorm = 0.0483, lr_0 = 1.7242e-04
Loss = 2.1225e-03, PNorm = 171.8247, GNorm = 0.0535, lr_0 = 1.7230e-04
Loss = 1.9349e-03, PNorm = 171.8271, GNorm = 0.1644, lr_0 = 1.7218e-04
Loss = 1.9455e-03, PNorm = 171.8315, GNorm = 0.0631, lr_0 = 1.7206e-04
Loss = 1.6279e-03, PNorm = 171.8361, GNorm = 0.0670, lr_0 = 1.7194e-04
Loss = 1.5665e-03, PNorm = 171.8387, GNorm = 0.0640, lr_0 = 1.7183e-04
Loss = 1.6584e-03, PNorm = 171.8419, GNorm = 0.0533, lr_0 = 1.7171e-04
Loss = 1.9454e-03, PNorm = 171.8443, GNorm = 0.1388, lr_0 = 1.7159e-04
Loss = 5.7455e-03, PNorm = 171.8464, GNorm = 0.1105, lr_0 = 1.7147e-04
Loss = 1.6909e-03, PNorm = 171.8496, GNorm = 0.0821, lr_0 = 1.7136e-04
Loss = 3.1847e-03, PNorm = 171.8526, GNorm = 0.2311, lr_0 = 1.7124e-04
Loss = 2.8074e-03, PNorm = 171.8560, GNorm = 0.0809, lr_0 = 1.7112e-04
Loss = 3.5784e-03, PNorm = 171.8599, GNorm = 0.3890, lr_0 = 1.7100e-04
Loss = 3.5491e-03, PNorm = 171.8632, GNorm = 0.2008, lr_0 = 1.7089e-04
Loss = 3.6667e-03, PNorm = 171.8689, GNorm = 0.2362, lr_0 = 1.7077e-04
Loss = 3.0576e-03, PNorm = 171.8723, GNorm = 0.3170, lr_0 = 1.7065e-04
Loss = 2.4770e-03, PNorm = 171.8764, GNorm = 0.2304, lr_0 = 1.7054e-04
Loss = 2.5418e-03, PNorm = 171.8802, GNorm = 0.1410, lr_0 = 1.7042e-04
Loss = 1.5752e-03, PNorm = 171.8833, GNorm = 0.3360, lr_0 = 1.7030e-04
Loss = 5.4550e-03, PNorm = 171.8873, GNorm = 0.0463, lr_0 = 1.7019e-04
Loss = 1.7896e-03, PNorm = 171.8905, GNorm = 0.1794, lr_0 = 1.7007e-04
Loss = 1.6651e-03, PNorm = 171.8945, GNorm = 0.1887, lr_0 = 1.6995e-04
Loss = 2.1727e-03, PNorm = 171.8975, GNorm = 0.1228, lr_0 = 1.6984e-04
Loss = 2.1561e-03, PNorm = 171.9002, GNorm = 0.2318, lr_0 = 1.6972e-04
Loss = 1.5346e-03, PNorm = 171.9029, GNorm = 0.0725, lr_0 = 1.6960e-04
Loss = 1.9768e-03, PNorm = 171.9044, GNorm = 0.1417, lr_0 = 1.6949e-04
Loss = 1.5391e-03, PNorm = 171.9079, GNorm = 0.1175, lr_0 = 1.6937e-04
Loss = 1.5643e-03, PNorm = 171.9107, GNorm = 0.0873, lr_0 = 1.6926e-04
Loss = 2.0563e-03, PNorm = 171.9145, GNorm = 0.1468, lr_0 = 1.6914e-04
Loss = 1.5099e-03, PNorm = 171.9183, GNorm = 0.2363, lr_0 = 1.6902e-04
Loss = 1.3588e-03, PNorm = 171.9215, GNorm = 0.0906, lr_0 = 1.6891e-04
Loss = 2.4771e-03, PNorm = 171.9240, GNorm = 0.1520, lr_0 = 1.6879e-04
Loss = 1.6830e-03, PNorm = 171.9276, GNorm = 0.0551, lr_0 = 1.6868e-04
Loss = 2.1774e-03, PNorm = 171.9320, GNorm = 0.1885, lr_0 = 1.6856e-04
Loss = 1.3090e-03, PNorm = 171.9332, GNorm = 0.1368, lr_0 = 1.6845e-04
Loss = 2.3313e-03, PNorm = 171.9347, GNorm = 0.0584, lr_0 = 1.6833e-04
Loss = 1.4636e-03, PNorm = 171.9359, GNorm = 0.0644, lr_0 = 1.6821e-04
Loss = 1.9719e-03, PNorm = 171.9398, GNorm = 0.1078, lr_0 = 1.6810e-04
Loss = 2.9985e-03, PNorm = 171.9441, GNorm = 0.0960, lr_0 = 1.6798e-04
Loss = 1.5339e-03, PNorm = 171.9476, GNorm = 0.1141, lr_0 = 1.6787e-04
Loss = 1.6603e-03, PNorm = 171.9489, GNorm = 0.2038, lr_0 = 1.6775e-04
Loss = 1.8965e-03, PNorm = 171.9514, GNorm = 0.0896, lr_0 = 1.6764e-04
Loss = 1.6234e-03, PNorm = 171.9557, GNorm = 0.1476, lr_0 = 1.6752e-04
Loss = 1.1753e-03, PNorm = 171.9593, GNorm = 0.1595, lr_0 = 1.6741e-04
Loss = 2.1918e-03, PNorm = 171.9618, GNorm = 0.0437, lr_0 = 1.6729e-04
Loss = 2.2077e-03, PNorm = 171.9624, GNorm = 0.0968, lr_0 = 1.6718e-04
Loss = 3.2082e-03, PNorm = 171.9649, GNorm = 0.1679, lr_0 = 1.6707e-04
Loss = 1.7012e-03, PNorm = 171.9693, GNorm = 0.1230, lr_0 = 1.6695e-04
Loss = 1.2131e-03, PNorm = 171.9750, GNorm = 0.0965, lr_0 = 1.6684e-04
Loss = 1.5730e-03, PNorm = 171.9809, GNorm = 0.0735, lr_0 = 1.6672e-04
Loss = 4.1886e-03, PNorm = 171.9868, GNorm = 0.0936, lr_0 = 1.6661e-04
Loss = 1.5435e-03, PNorm = 171.9914, GNorm = 0.0562, lr_0 = 1.6649e-04
Loss = 2.0259e-03, PNorm = 171.9953, GNorm = 0.0668, lr_0 = 1.6638e-04
Loss = 1.6547e-03, PNorm = 171.9986, GNorm = 0.1230, lr_0 = 1.6627e-04
Loss = 2.6894e-03, PNorm = 172.0019, GNorm = 0.1608, lr_0 = 1.6615e-04
Loss = 1.9739e-03, PNorm = 172.0061, GNorm = 0.1559, lr_0 = 1.6604e-04
Loss = 2.7275e-03, PNorm = 172.0104, GNorm = 0.0617, lr_0 = 1.6592e-04
Loss = 1.3147e-03, PNorm = 172.0140, GNorm = 0.2112, lr_0 = 1.6581e-04
Loss = 1.6117e-03, PNorm = 172.0174, GNorm = 0.1718, lr_0 = 1.6570e-04
Loss = 1.3460e-03, PNorm = 172.0195, GNorm = 0.1039, lr_0 = 1.6558e-04
Loss = 2.6721e-03, PNorm = 172.0220, GNorm = 0.1519, lr_0 = 1.6547e-04
Loss = 2.3546e-03, PNorm = 172.0262, GNorm = 0.1855, lr_0 = 1.6536e-04
Loss = 1.6617e-03, PNorm = 172.0286, GNorm = 0.1265, lr_0 = 1.6524e-04
Loss = 2.4593e-03, PNorm = 172.0335, GNorm = 0.0886, lr_0 = 1.6513e-04
Loss = 1.7928e-03, PNorm = 172.0372, GNorm = 0.1244, lr_0 = 1.6502e-04
Loss = 1.2862e-03, PNorm = 172.0411, GNorm = 0.2687, lr_0 = 1.6490e-04
Loss = 2.3946e-03, PNorm = 172.0465, GNorm = 0.1764, lr_0 = 1.6479e-04
Loss = 2.5013e-03, PNorm = 172.0492, GNorm = 0.0849, lr_0 = 1.6468e-04
Loss = 2.9093e-03, PNorm = 172.0496, GNorm = 0.0580, lr_0 = 1.6457e-04
Loss = 3.6134e-03, PNorm = 172.0503, GNorm = 0.0685, lr_0 = 1.6445e-04
Loss = 1.9267e-03, PNorm = 172.0531, GNorm = 0.2049, lr_0 = 1.6434e-04
Loss = 1.6424e-03, PNorm = 172.0572, GNorm = 0.0862, lr_0 = 1.6423e-04
Loss = 2.9702e-03, PNorm = 172.0626, GNorm = 0.1219, lr_0 = 1.6412e-04
Loss = 3.0820e-03, PNorm = 172.0663, GNorm = 0.1942, lr_0 = 1.6400e-04
Loss = 2.4316e-03, PNorm = 172.0687, GNorm = 0.1625, lr_0 = 1.6389e-04
Loss = 1.1993e-03, PNorm = 172.0727, GNorm = 0.0729, lr_0 = 1.6378e-04
Validation mae = 0.278229
Epoch 24
Loss = 1.6769e-03, PNorm = 172.0747, GNorm = 0.1400, lr_0 = 1.6367e-04
Loss = 3.2951e-03, PNorm = 172.0762, GNorm = 0.0505, lr_0 = 1.6355e-04
Loss = 1.4654e-03, PNorm = 172.0777, GNorm = 0.1050, lr_0 = 1.6344e-04
Loss = 2.3834e-03, PNorm = 172.0805, GNorm = 0.1732, lr_0 = 1.6333e-04
Loss = 1.3964e-03, PNorm = 172.0829, GNorm = 0.0486, lr_0 = 1.6322e-04
Loss = 2.6737e-03, PNorm = 172.0843, GNorm = 0.0360, lr_0 = 1.6311e-04
Loss = 3.3190e-03, PNorm = 172.0859, GNorm = 0.1308, lr_0 = 1.6299e-04
Loss = 1.4703e-03, PNorm = 172.0881, GNorm = 0.0678, lr_0 = 1.6288e-04
Loss = 2.0585e-03, PNorm = 172.0913, GNorm = 0.2171, lr_0 = 1.6277e-04
Loss = 1.6510e-03, PNorm = 172.0934, GNorm = 0.1198, lr_0 = 1.6266e-04
Loss = 1.4764e-03, PNorm = 172.0952, GNorm = 0.0551, lr_0 = 1.6255e-04
Loss = 1.4323e-03, PNorm = 172.0982, GNorm = 0.1280, lr_0 = 1.6244e-04
Loss = 2.3203e-03, PNorm = 172.1003, GNorm = 0.0742, lr_0 = 1.6233e-04
Loss = 1.3487e-03, PNorm = 172.1038, GNorm = 0.1186, lr_0 = 1.6221e-04
Loss = 1.5561e-03, PNorm = 172.1065, GNorm = 0.0706, lr_0 = 1.6210e-04
Loss = 1.2689e-03, PNorm = 172.1080, GNorm = 0.1513, lr_0 = 1.6199e-04
Loss = 1.2585e-03, PNorm = 172.1095, GNorm = 0.1249, lr_0 = 1.6188e-04
Loss = 1.5130e-03, PNorm = 172.1119, GNorm = 0.1575, lr_0 = 1.6177e-04
Loss = 1.1196e-03, PNorm = 172.1145, GNorm = 0.0603, lr_0 = 1.6166e-04
Loss = 1.1787e-03, PNorm = 172.1172, GNorm = 0.1248, lr_0 = 1.6155e-04
Loss = 1.2061e-03, PNorm = 172.1191, GNorm = 0.0343, lr_0 = 1.6144e-04
Loss = 1.6189e-03, PNorm = 172.1229, GNorm = 0.0617, lr_0 = 1.6133e-04
Loss = 1.3903e-03, PNorm = 172.1267, GNorm = 0.0656, lr_0 = 1.6122e-04
Loss = 1.8870e-03, PNorm = 172.1290, GNorm = 0.0564, lr_0 = 1.6111e-04
Loss = 2.3417e-03, PNorm = 172.1329, GNorm = 0.0361, lr_0 = 1.6100e-04
Loss = 1.5278e-03, PNorm = 172.1336, GNorm = 0.0772, lr_0 = 1.6089e-04
Loss = 4.1575e-03, PNorm = 172.1360, GNorm = 0.1279, lr_0 = 1.6078e-04
Loss = 1.2045e-03, PNorm = 172.1387, GNorm = 0.0559, lr_0 = 1.6067e-04
Loss = 1.1884e-03, PNorm = 172.1406, GNorm = 0.1383, lr_0 = 1.6056e-04
Loss = 1.5163e-03, PNorm = 172.1432, GNorm = 0.1148, lr_0 = 1.6045e-04
Loss = 1.7983e-03, PNorm = 172.1466, GNorm = 0.0859, lr_0 = 1.6034e-04
Loss = 1.2177e-03, PNorm = 172.1493, GNorm = 0.0636, lr_0 = 1.6023e-04
Loss = 1.2159e-03, PNorm = 172.1510, GNorm = 0.1043, lr_0 = 1.6012e-04
Loss = 3.6748e-03, PNorm = 172.1530, GNorm = 0.0357, lr_0 = 1.6001e-04
Loss = 3.2780e-03, PNorm = 172.1570, GNorm = 0.3777, lr_0 = 1.5990e-04
Loss = 1.3854e-03, PNorm = 172.1605, GNorm = 0.0706, lr_0 = 1.5979e-04
Loss = 1.4524e-03, PNorm = 172.1649, GNorm = 0.0448, lr_0 = 1.5968e-04
Loss = 2.3817e-03, PNorm = 172.1690, GNorm = 0.1739, lr_0 = 1.5957e-04
Loss = 1.4500e-03, PNorm = 172.1720, GNorm = 0.0664, lr_0 = 1.5946e-04
Loss = 1.4728e-03, PNorm = 172.1735, GNorm = 0.1400, lr_0 = 1.5935e-04
Loss = 1.9954e-03, PNorm = 172.1759, GNorm = 0.1570, lr_0 = 1.5924e-04
Loss = 1.8277e-03, PNorm = 172.1774, GNorm = 0.1048, lr_0 = 1.5913e-04
Loss = 1.9907e-03, PNorm = 172.1809, GNorm = 0.3218, lr_0 = 1.5902e-04
Loss = 2.8560e-03, PNorm = 172.1846, GNorm = 0.0493, lr_0 = 1.5891e-04
Loss = 1.2459e-03, PNorm = 172.1872, GNorm = 0.1528, lr_0 = 1.5880e-04
Loss = 1.6781e-03, PNorm = 172.1888, GNorm = 0.0965, lr_0 = 1.5870e-04
Loss = 3.7777e-03, PNorm = 172.1892, GNorm = 0.0437, lr_0 = 1.5859e-04
Loss = 1.7355e-03, PNorm = 172.1924, GNorm = 0.0433, lr_0 = 1.5848e-04
Loss = 1.3034e-03, PNorm = 172.1961, GNorm = 0.1353, lr_0 = 1.5837e-04
Loss = 2.7156e-03, PNorm = 172.2000, GNorm = 0.0496, lr_0 = 1.5826e-04
Loss = 2.7876e-03, PNorm = 172.2023, GNorm = 0.1294, lr_0 = 1.5815e-04
Loss = 2.2932e-03, PNorm = 172.2056, GNorm = 0.1054, lr_0 = 1.5804e-04
Loss = 1.1897e-03, PNorm = 172.2089, GNorm = 0.0646, lr_0 = 1.5794e-04
Loss = 1.1119e-03, PNorm = 172.2108, GNorm = 0.1000, lr_0 = 1.5783e-04
Loss = 1.3801e-03, PNorm = 172.2137, GNorm = 0.0324, lr_0 = 1.5772e-04
Loss = 1.4071e-03, PNorm = 172.2177, GNorm = 0.0897, lr_0 = 1.5761e-04
Loss = 2.0014e-03, PNorm = 172.2202, GNorm = 0.1150, lr_0 = 1.5750e-04
Loss = 1.9482e-03, PNorm = 172.2219, GNorm = 0.0733, lr_0 = 1.5740e-04
Loss = 2.4710e-03, PNorm = 172.2228, GNorm = 0.1475, lr_0 = 1.5729e-04
Loss = 1.7785e-03, PNorm = 172.2240, GNorm = 0.0955, lr_0 = 1.5718e-04
Loss = 1.3279e-03, PNorm = 172.2273, GNorm = 0.0898, lr_0 = 1.5707e-04
Loss = 1.1326e-03, PNorm = 172.2321, GNorm = 0.0921, lr_0 = 1.5697e-04
Loss = 1.2547e-03, PNorm = 172.2351, GNorm = 0.1548, lr_0 = 1.5686e-04
Loss = 1.5237e-03, PNorm = 172.2374, GNorm = 0.0984, lr_0 = 1.5675e-04
Loss = 2.1144e-03, PNorm = 172.2391, GNorm = 0.0744, lr_0 = 1.5664e-04
Loss = 1.7288e-03, PNorm = 172.2406, GNorm = 0.2382, lr_0 = 1.5654e-04
Loss = 2.2796e-03, PNorm = 172.2433, GNorm = 0.0398, lr_0 = 1.5643e-04
Loss = 1.8779e-03, PNorm = 172.2455, GNorm = 0.0847, lr_0 = 1.5632e-04
Loss = 1.8733e-03, PNorm = 172.2484, GNorm = 0.0455, lr_0 = 1.5621e-04
Loss = 1.5829e-03, PNorm = 172.2509, GNorm = 0.1817, lr_0 = 1.5611e-04
Loss = 1.2081e-03, PNorm = 172.2528, GNorm = 0.1473, lr_0 = 1.5600e-04
Loss = 1.0960e-03, PNorm = 172.2563, GNorm = 0.0759, lr_0 = 1.5589e-04
Loss = 1.9897e-03, PNorm = 172.2600, GNorm = 0.1345, lr_0 = 1.5579e-04
Loss = 1.1033e-03, PNorm = 172.2635, GNorm = 0.1055, lr_0 = 1.5568e-04
Loss = 1.0969e-03, PNorm = 172.2665, GNorm = 0.1110, lr_0 = 1.5557e-04
Loss = 1.5655e-03, PNorm = 172.2707, GNorm = 0.1127, lr_0 = 1.5547e-04
Loss = 2.2672e-03, PNorm = 172.2739, GNorm = 0.1108, lr_0 = 1.5536e-04
Loss = 1.9132e-03, PNorm = 172.2759, GNorm = 0.0781, lr_0 = 1.5525e-04
Loss = 1.8715e-03, PNorm = 172.2788, GNorm = 0.2582, lr_0 = 1.5515e-04
Loss = 1.7527e-03, PNorm = 172.2818, GNorm = 0.1211, lr_0 = 1.5504e-04
Loss = 2.7259e-03, PNorm = 172.2828, GNorm = 0.1214, lr_0 = 1.5493e-04
Loss = 3.2034e-03, PNorm = 172.2843, GNorm = 0.0966, lr_0 = 1.5483e-04
Loss = 1.8503e-03, PNorm = 172.2890, GNorm = 0.1831, lr_0 = 1.5472e-04
Loss = 4.3408e-03, PNorm = 172.2920, GNorm = 0.0855, lr_0 = 1.5462e-04
Loss = 3.2868e-03, PNorm = 172.2953, GNorm = 0.1020, lr_0 = 1.5451e-04
Loss = 1.5841e-03, PNorm = 172.2980, GNorm = 0.0699, lr_0 = 1.5440e-04
Loss = 2.3910e-03, PNorm = 172.3000, GNorm = 0.0940, lr_0 = 1.5430e-04
Loss = 1.4971e-03, PNorm = 172.3031, GNorm = 0.2399, lr_0 = 1.5419e-04
Loss = 1.1258e-03, PNorm = 172.3066, GNorm = 0.0886, lr_0 = 1.5409e-04
Loss = 2.1605e-03, PNorm = 172.3090, GNorm = 0.0455, lr_0 = 1.5398e-04
Loss = 1.4281e-03, PNorm = 172.3113, GNorm = 0.0999, lr_0 = 1.5388e-04
Loss = 2.2293e-03, PNorm = 172.3147, GNorm = 0.1257, lr_0 = 1.5377e-04
Loss = 1.4739e-03, PNorm = 172.3173, GNorm = 0.1786, lr_0 = 1.5367e-04
Loss = 1.6071e-03, PNorm = 172.3195, GNorm = 0.1643, lr_0 = 1.5356e-04
Loss = 3.4172e-03, PNorm = 172.3207, GNorm = 0.3023, lr_0 = 1.5346e-04
Loss = 1.8170e-03, PNorm = 172.3230, GNorm = 0.0506, lr_0 = 1.5335e-04
Loss = 1.4638e-03, PNorm = 172.3255, GNorm = 0.0758, lr_0 = 1.5325e-04
Loss = 1.6594e-03, PNorm = 172.3292, GNorm = 0.0928, lr_0 = 1.5314e-04
Loss = 1.0096e-03, PNorm = 172.3323, GNorm = 0.0614, lr_0 = 1.5304e-04
Loss = 1.2158e-03, PNorm = 172.3349, GNorm = 0.1349, lr_0 = 1.5293e-04
Loss = 1.0758e-03, PNorm = 172.3375, GNorm = 0.0839, lr_0 = 1.5283e-04
Loss = 3.2272e-03, PNorm = 172.3399, GNorm = 0.2325, lr_0 = 1.5272e-04
Loss = 1.8501e-03, PNorm = 172.3416, GNorm = 0.0410, lr_0 = 1.5262e-04
Loss = 1.2326e-03, PNorm = 172.3446, GNorm = 0.1005, lr_0 = 1.5251e-04
Loss = 1.2532e-03, PNorm = 172.3481, GNorm = 0.1077, lr_0 = 1.5241e-04
Loss = 1.2486e-03, PNorm = 172.3504, GNorm = 0.0622, lr_0 = 1.5230e-04
Loss = 1.9878e-03, PNorm = 172.3533, GNorm = 0.1034, lr_0 = 1.5220e-04
Loss = 1.3941e-03, PNorm = 172.3563, GNorm = 0.0897, lr_0 = 1.5209e-04
Loss = 2.2020e-03, PNorm = 172.3591, GNorm = 0.0979, lr_0 = 1.5199e-04
Loss = 2.5951e-03, PNorm = 172.3612, GNorm = 0.3231, lr_0 = 1.5189e-04
Loss = 2.0613e-03, PNorm = 172.3646, GNorm = 0.1788, lr_0 = 1.5178e-04
Loss = 1.6503e-03, PNorm = 172.3674, GNorm = 0.1322, lr_0 = 1.5168e-04
Loss = 2.7493e-03, PNorm = 172.3712, GNorm = 0.0307, lr_0 = 1.5157e-04
Loss = 2.6682e-03, PNorm = 172.3745, GNorm = 0.0816, lr_0 = 1.5147e-04
Loss = 1.5742e-03, PNorm = 172.3777, GNorm = 0.1243, lr_0 = 1.5137e-04
Loss = 1.8791e-03, PNorm = 172.3803, GNorm = 0.0932, lr_0 = 1.5126e-04
Loss = 4.5928e-03, PNorm = 172.3814, GNorm = 0.1757, lr_0 = 1.5116e-04
Loss = 1.8335e-03, PNorm = 172.3832, GNorm = 0.0696, lr_0 = 1.5106e-04
Loss = 2.9461e-03, PNorm = 172.3843, GNorm = 0.0939, lr_0 = 1.5095e-04
Loss = 2.9855e-03, PNorm = 172.3902, GNorm = 0.1244, lr_0 = 1.5085e-04
Validation mae = 0.277885
Epoch 25
Loss = 1.1416e-03, PNorm = 172.3933, GNorm = 0.2078, lr_0 = 1.5075e-04
Loss = 1.5529e-03, PNorm = 172.3957, GNorm = 0.1042, lr_0 = 1.5064e-04
Loss = 2.3564e-03, PNorm = 172.3953, GNorm = 0.0806, lr_0 = 1.5054e-04
Loss = 1.1067e-03, PNorm = 172.3966, GNorm = 0.1132, lr_0 = 1.5044e-04
Loss = 2.1477e-03, PNorm = 172.3977, GNorm = 0.0597, lr_0 = 1.5033e-04
Loss = 2.4655e-03, PNorm = 172.4004, GNorm = 0.1790, lr_0 = 1.5023e-04
Loss = 1.7959e-03, PNorm = 172.4028, GNorm = 0.0635, lr_0 = 1.5013e-04
Loss = 1.2053e-03, PNorm = 172.4048, GNorm = 0.3289, lr_0 = 1.5002e-04
Loss = 9.1689e-04, PNorm = 172.4066, GNorm = 0.0999, lr_0 = 1.4992e-04
Loss = 1.7463e-03, PNorm = 172.4082, GNorm = 0.0474, lr_0 = 1.4982e-04
Loss = 1.8655e-03, PNorm = 172.4089, GNorm = 0.0405, lr_0 = 1.4972e-04
Loss = 1.6808e-03, PNorm = 172.4120, GNorm = 0.0961, lr_0 = 1.4961e-04
Loss = 1.2308e-03, PNorm = 172.4144, GNorm = 0.2660, lr_0 = 1.4951e-04
Loss = 1.4347e-03, PNorm = 172.4171, GNorm = 0.0849, lr_0 = 1.4941e-04
Loss = 1.1608e-03, PNorm = 172.4185, GNorm = 0.0384, lr_0 = 1.4931e-04
Loss = 1.1564e-03, PNorm = 172.4203, GNorm = 0.2152, lr_0 = 1.4920e-04
Loss = 2.1174e-03, PNorm = 172.4215, GNorm = 0.0876, lr_0 = 1.4910e-04
Loss = 2.2817e-03, PNorm = 172.4228, GNorm = 0.1276, lr_0 = 1.4900e-04
Loss = 1.0551e-03, PNorm = 172.4250, GNorm = 0.0621, lr_0 = 1.4890e-04
Loss = 9.0181e-04, PNorm = 172.4279, GNorm = 0.1634, lr_0 = 1.4880e-04
Loss = 2.6225e-03, PNorm = 172.4301, GNorm = 0.1555, lr_0 = 1.4869e-04
Loss = 1.3758e-03, PNorm = 172.4345, GNorm = 0.1731, lr_0 = 1.4859e-04
Loss = 8.5530e-04, PNorm = 172.4380, GNorm = 0.1695, lr_0 = 1.4849e-04
Loss = 2.3960e-03, PNorm = 172.4406, GNorm = 0.1224, lr_0 = 1.4839e-04
Loss = 1.4889e-03, PNorm = 172.4415, GNorm = 0.0297, lr_0 = 1.4829e-04
Loss = 3.0233e-03, PNorm = 172.4427, GNorm = 0.0456, lr_0 = 1.4818e-04
Loss = 1.4808e-03, PNorm = 172.4453, GNorm = 0.1370, lr_0 = 1.4808e-04
Loss = 1.5348e-03, PNorm = 172.4460, GNorm = 0.0620, lr_0 = 1.4798e-04
Loss = 1.4020e-03, PNorm = 172.4475, GNorm = 0.0500, lr_0 = 1.4788e-04
Loss = 2.9398e-03, PNorm = 172.4495, GNorm = 0.0714, lr_0 = 1.4778e-04
Loss = 1.8877e-03, PNorm = 172.4519, GNorm = 0.1692, lr_0 = 1.4768e-04
Loss = 1.6297e-03, PNorm = 172.4529, GNorm = 0.1405, lr_0 = 1.4758e-04
Loss = 1.0995e-03, PNorm = 172.4549, GNorm = 0.0884, lr_0 = 1.4748e-04
Loss = 1.4804e-03, PNorm = 172.4569, GNorm = 0.1334, lr_0 = 1.4737e-04
Loss = 1.6257e-03, PNorm = 172.4597, GNorm = 0.0944, lr_0 = 1.4727e-04
Loss = 1.9134e-03, PNorm = 172.4619, GNorm = 0.2329, lr_0 = 1.4717e-04
Loss = 1.2410e-03, PNorm = 172.4645, GNorm = 0.0826, lr_0 = 1.4707e-04
Loss = 2.0178e-03, PNorm = 172.4666, GNorm = 0.0951, lr_0 = 1.4697e-04
Loss = 2.0844e-03, PNorm = 172.4685, GNorm = 0.1117, lr_0 = 1.4687e-04
Loss = 1.2462e-03, PNorm = 172.4705, GNorm = 0.1650, lr_0 = 1.4677e-04
Loss = 1.7912e-03, PNorm = 172.4724, GNorm = 0.0912, lr_0 = 1.4667e-04
Loss = 1.2646e-03, PNorm = 172.4738, GNorm = 0.0954, lr_0 = 1.4657e-04
Loss = 1.5699e-03, PNorm = 172.4756, GNorm = 0.0674, lr_0 = 1.4647e-04
Loss = 1.1466e-03, PNorm = 172.4785, GNorm = 0.0775, lr_0 = 1.4637e-04
Loss = 1.1430e-03, PNorm = 172.4812, GNorm = 0.1930, lr_0 = 1.4627e-04
Loss = 3.3335e-03, PNorm = 172.4824, GNorm = 0.0979, lr_0 = 1.4617e-04
Loss = 1.9857e-03, PNorm = 172.4838, GNorm = 0.2521, lr_0 = 1.4607e-04
Loss = 1.4093e-03, PNorm = 172.4859, GNorm = 0.0475, lr_0 = 1.4597e-04
Loss = 8.8314e-04, PNorm = 172.4886, GNorm = 0.0844, lr_0 = 1.4587e-04
Loss = 1.2978e-03, PNorm = 172.4905, GNorm = 0.0861, lr_0 = 1.4577e-04
Loss = 2.0342e-03, PNorm = 172.4900, GNorm = 0.0618, lr_0 = 1.4567e-04
Loss = 2.5517e-03, PNorm = 172.4918, GNorm = 0.1435, lr_0 = 1.4557e-04
Loss = 1.8410e-03, PNorm = 172.4953, GNorm = 0.1512, lr_0 = 1.4547e-04
Loss = 2.9740e-03, PNorm = 172.4991, GNorm = 0.0573, lr_0 = 1.4537e-04
Loss = 8.8822e-04, PNorm = 172.5029, GNorm = 0.0271, lr_0 = 1.4527e-04
Loss = 1.1000e-03, PNorm = 172.5059, GNorm = 0.0774, lr_0 = 1.4517e-04
Loss = 1.6812e-03, PNorm = 172.5077, GNorm = 0.0680, lr_0 = 1.4507e-04
Loss = 1.7631e-03, PNorm = 172.5104, GNorm = 0.0754, lr_0 = 1.4497e-04
Loss = 2.5991e-03, PNorm = 172.5140, GNorm = 0.1232, lr_0 = 1.4487e-04
Loss = 1.1778e-03, PNorm = 172.5171, GNorm = 0.1411, lr_0 = 1.4477e-04
Loss = 1.0811e-03, PNorm = 172.5189, GNorm = 0.0274, lr_0 = 1.4467e-04
Loss = 9.8231e-04, PNorm = 172.5200, GNorm = 0.1269, lr_0 = 1.4457e-04
Loss = 1.0164e-03, PNorm = 172.5218, GNorm = 0.0473, lr_0 = 1.4447e-04
Loss = 1.7621e-03, PNorm = 172.5238, GNorm = 0.0893, lr_0 = 1.4438e-04
Loss = 1.9060e-03, PNorm = 172.5252, GNorm = 0.2103, lr_0 = 1.4428e-04
Loss = 2.2082e-03, PNorm = 172.5271, GNorm = 0.0473, lr_0 = 1.4418e-04
Loss = 1.0670e-03, PNorm = 172.5292, GNorm = 0.0712, lr_0 = 1.4408e-04
Loss = 1.3704e-03, PNorm = 172.5305, GNorm = 0.0645, lr_0 = 1.4398e-04
Loss = 1.2616e-03, PNorm = 172.5318, GNorm = 0.1478, lr_0 = 1.4388e-04
Loss = 1.1479e-03, PNorm = 172.5330, GNorm = 0.0999, lr_0 = 1.4378e-04
Loss = 2.2750e-03, PNorm = 172.5340, GNorm = 0.1164, lr_0 = 1.4368e-04
Loss = 1.3341e-03, PNorm = 172.5359, GNorm = 0.1140, lr_0 = 1.4359e-04
Loss = 2.3175e-03, PNorm = 172.5381, GNorm = 0.0919, lr_0 = 1.4349e-04
Loss = 1.0768e-03, PNorm = 172.5408, GNorm = 0.2851, lr_0 = 1.4339e-04
Loss = 4.2178e-03, PNorm = 172.5414, GNorm = 0.1313, lr_0 = 1.4329e-04
Loss = 1.2261e-03, PNorm = 172.5429, GNorm = 0.1269, lr_0 = 1.4319e-04
Loss = 1.1905e-03, PNorm = 172.5460, GNorm = 0.1386, lr_0 = 1.4310e-04
Loss = 6.4892e-03, PNorm = 172.5487, GNorm = 0.0751, lr_0 = 1.4300e-04
Loss = 1.2106e-03, PNorm = 172.5517, GNorm = 0.0982, lr_0 = 1.4290e-04
Loss = 1.2262e-03, PNorm = 172.5538, GNorm = 0.0834, lr_0 = 1.4280e-04
Loss = 1.0178e-03, PNorm = 172.5560, GNorm = 0.1718, lr_0 = 1.4270e-04
Loss = 1.1577e-03, PNorm = 172.5581, GNorm = 0.0653, lr_0 = 1.4261e-04
Loss = 1.4540e-03, PNorm = 172.5612, GNorm = 0.0718, lr_0 = 1.4251e-04
Loss = 2.2750e-03, PNorm = 172.5652, GNorm = 0.0610, lr_0 = 1.4241e-04
Loss = 1.5969e-03, PNorm = 172.5691, GNorm = 0.2075, lr_0 = 1.4231e-04
Loss = 1.2561e-03, PNorm = 172.5717, GNorm = 0.1384, lr_0 = 1.4222e-04
Loss = 4.4654e-03, PNorm = 172.5762, GNorm = 0.2375, lr_0 = 1.4212e-04
Loss = 2.0880e-03, PNorm = 172.5800, GNorm = 0.1383, lr_0 = 1.4202e-04
Loss = 1.2637e-03, PNorm = 172.5828, GNorm = 0.1303, lr_0 = 1.4192e-04
Loss = 1.2273e-03, PNorm = 172.5866, GNorm = 0.1785, lr_0 = 1.4183e-04
Loss = 1.5075e-03, PNorm = 172.5871, GNorm = 0.0558, lr_0 = 1.4173e-04
Loss = 2.4236e-03, PNorm = 172.5877, GNorm = 0.2116, lr_0 = 1.4163e-04
Loss = 1.0827e-03, PNorm = 172.5878, GNorm = 0.0581, lr_0 = 1.4153e-04
Loss = 2.9433e-03, PNorm = 172.5893, GNorm = 0.1232, lr_0 = 1.4144e-04
Loss = 2.0976e-03, PNorm = 172.5924, GNorm = 0.0689, lr_0 = 1.4134e-04
Loss = 2.9510e-03, PNorm = 172.5959, GNorm = 0.1293, lr_0 = 1.4124e-04
Loss = 3.3471e-03, PNorm = 172.6001, GNorm = 0.1476, lr_0 = 1.4115e-04
Loss = 2.3354e-03, PNorm = 172.6031, GNorm = 0.1474, lr_0 = 1.4105e-04
Loss = 2.2287e-03, PNorm = 172.6054, GNorm = 0.1729, lr_0 = 1.4095e-04
Loss = 1.3262e-03, PNorm = 172.6089, GNorm = 0.0922, lr_0 = 1.4086e-04
Loss = 1.6699e-03, PNorm = 172.6114, GNorm = 0.2151, lr_0 = 1.4076e-04
Loss = 1.5189e-03, PNorm = 172.6145, GNorm = 0.0745, lr_0 = 1.4066e-04
Loss = 2.3929e-03, PNorm = 172.6182, GNorm = 0.0612, lr_0 = 1.4057e-04
Loss = 1.8675e-03, PNorm = 172.6227, GNorm = 0.3072, lr_0 = 1.4047e-04
Loss = 1.3305e-03, PNorm = 172.6251, GNorm = 0.0347, lr_0 = 1.4038e-04
Loss = 1.3658e-03, PNorm = 172.6262, GNorm = 0.3189, lr_0 = 1.4028e-04
Loss = 9.7269e-04, PNorm = 172.6265, GNorm = 0.1011, lr_0 = 1.4018e-04
Loss = 1.3676e-03, PNorm = 172.6284, GNorm = 0.0476, lr_0 = 1.4009e-04
Loss = 1.1785e-03, PNorm = 172.6323, GNorm = 0.0951, lr_0 = 1.3999e-04
Loss = 1.4931e-03, PNorm = 172.6359, GNorm = 0.1018, lr_0 = 1.3990e-04
Loss = 9.2968e-04, PNorm = 172.6375, GNorm = 0.1252, lr_0 = 1.3980e-04
Loss = 1.1056e-03, PNorm = 172.6396, GNorm = 0.1394, lr_0 = 1.3970e-04
Loss = 2.4102e-03, PNorm = 172.6394, GNorm = 0.0861, lr_0 = 1.3961e-04
Loss = 2.7887e-03, PNorm = 172.6412, GNorm = 0.1408, lr_0 = 1.3951e-04
Loss = 1.1450e-03, PNorm = 172.6438, GNorm = 0.0859, lr_0 = 1.3942e-04
Loss = 1.3876e-03, PNorm = 172.6449, GNorm = 0.1083, lr_0 = 1.3932e-04
Loss = 1.1735e-03, PNorm = 172.6461, GNorm = 0.0452, lr_0 = 1.3923e-04
Loss = 2.1616e-03, PNorm = 172.6478, GNorm = 0.0626, lr_0 = 1.3913e-04
Loss = 9.2068e-04, PNorm = 172.6500, GNorm = 0.0475, lr_0 = 1.3904e-04
Loss = 1.5027e-03, PNorm = 172.6523, GNorm = 0.0331, lr_0 = 1.3894e-04
Validation mae = 0.278049
Epoch 26
Loss = 1.8296e-03, PNorm = 172.6539, GNorm = 0.1659, lr_0 = 1.3884e-04
Loss = 2.3143e-03, PNorm = 172.6546, GNorm = 0.0753, lr_0 = 1.3875e-04
Loss = 1.5649e-03, PNorm = 172.6563, GNorm = 0.0898, lr_0 = 1.3865e-04
Loss = 2.5797e-03, PNorm = 172.6590, GNorm = 0.0801, lr_0 = 1.3856e-04
Loss = 1.2949e-03, PNorm = 172.6618, GNorm = 0.1223, lr_0 = 1.3846e-04
Loss = 1.3488e-03, PNorm = 172.6644, GNorm = 0.1903, lr_0 = 1.3837e-04
Loss = 1.3461e-03, PNorm = 172.6650, GNorm = 0.0585, lr_0 = 1.3828e-04
Loss = 1.0615e-03, PNorm = 172.6660, GNorm = 0.1579, lr_0 = 1.3818e-04
Loss = 1.1669e-03, PNorm = 172.6679, GNorm = 0.0495, lr_0 = 1.3809e-04
Loss = 8.0583e-04, PNorm = 172.6711, GNorm = 0.0352, lr_0 = 1.3799e-04
Loss = 1.4175e-03, PNorm = 172.6727, GNorm = 0.0980, lr_0 = 1.3790e-04
Loss = 9.3238e-04, PNorm = 172.6737, GNorm = 0.1052, lr_0 = 1.3780e-04
Loss = 1.2554e-03, PNorm = 172.6739, GNorm = 0.0936, lr_0 = 1.3771e-04
Loss = 2.6511e-03, PNorm = 172.6748, GNorm = 0.0847, lr_0 = 1.3761e-04
Loss = 1.6985e-03, PNorm = 172.6749, GNorm = 0.0781, lr_0 = 1.3752e-04
Loss = 1.2475e-03, PNorm = 172.6772, GNorm = 0.0532, lr_0 = 1.3742e-04
Loss = 7.6724e-04, PNorm = 172.6792, GNorm = 0.0910, lr_0 = 1.3733e-04
Loss = 1.9823e-03, PNorm = 172.6811, GNorm = 0.0940, lr_0 = 1.3724e-04
Loss = 7.3193e-04, PNorm = 172.6829, GNorm = 0.1053, lr_0 = 1.3714e-04
Loss = 8.6100e-04, PNorm = 172.6841, GNorm = 0.0532, lr_0 = 1.3705e-04
Loss = 1.5411e-03, PNorm = 172.6858, GNorm = 0.2206, lr_0 = 1.3695e-04
Loss = 1.0702e-03, PNorm = 172.6882, GNorm = 0.1142, lr_0 = 1.3686e-04
Loss = 1.0171e-03, PNorm = 172.6900, GNorm = 0.1266, lr_0 = 1.3677e-04
Loss = 1.7286e-03, PNorm = 172.6924, GNorm = 0.1134, lr_0 = 1.3667e-04
Loss = 9.9778e-04, PNorm = 172.6941, GNorm = 0.1447, lr_0 = 1.3658e-04
Loss = 3.7021e-03, PNorm = 172.6942, GNorm = 0.0284, lr_0 = 1.3649e-04
Loss = 1.8350e-03, PNorm = 172.6951, GNorm = 0.0438, lr_0 = 1.3639e-04
Loss = 1.1909e-03, PNorm = 172.6952, GNorm = 0.2171, lr_0 = 1.3630e-04
Loss = 3.8581e-03, PNorm = 172.6966, GNorm = 0.0331, lr_0 = 1.3621e-04
Loss = 8.3895e-04, PNorm = 172.6984, GNorm = 0.0593, lr_0 = 1.3611e-04
Loss = 1.1286e-03, PNorm = 172.6993, GNorm = 0.0235, lr_0 = 1.3602e-04
Loss = 7.4341e-04, PNorm = 172.7008, GNorm = 0.0833, lr_0 = 1.3593e-04
Loss = 1.3772e-03, PNorm = 172.7012, GNorm = 0.1187, lr_0 = 1.3583e-04
Loss = 8.8823e-04, PNorm = 172.7037, GNorm = 0.0593, lr_0 = 1.3574e-04
Loss = 1.5356e-03, PNorm = 172.7060, GNorm = 0.1082, lr_0 = 1.3565e-04
Loss = 2.5019e-03, PNorm = 172.7080, GNorm = 0.1577, lr_0 = 1.3555e-04
Loss = 1.5839e-03, PNorm = 172.7100, GNorm = 0.0853, lr_0 = 1.3546e-04
Loss = 1.1806e-03, PNorm = 172.7125, GNorm = 0.1032, lr_0 = 1.3537e-04
Loss = 1.0102e-03, PNorm = 172.7153, GNorm = 0.1141, lr_0 = 1.3528e-04
Loss = 1.3337e-03, PNorm = 172.7193, GNorm = 0.0593, lr_0 = 1.3518e-04
Loss = 2.4091e-03, PNorm = 172.7235, GNorm = 0.1487, lr_0 = 1.3509e-04
Loss = 1.4585e-03, PNorm = 172.7252, GNorm = 0.0549, lr_0 = 1.3500e-04
Loss = 2.1863e-03, PNorm = 172.7262, GNorm = 0.1139, lr_0 = 1.3491e-04
Loss = 2.6272e-03, PNorm = 172.7267, GNorm = 0.1688, lr_0 = 1.3481e-04
Loss = 2.7944e-03, PNorm = 172.7268, GNorm = 0.0727, lr_0 = 1.3472e-04
Loss = 1.3974e-03, PNorm = 172.7290, GNorm = 0.0940, lr_0 = 1.3463e-04
Loss = 1.0164e-03, PNorm = 172.7318, GNorm = 0.0568, lr_0 = 1.3454e-04
Loss = 9.3892e-04, PNorm = 172.7338, GNorm = 0.0720, lr_0 = 1.3444e-04
Loss = 2.1163e-03, PNorm = 172.7366, GNorm = 0.0385, lr_0 = 1.3435e-04
Loss = 1.6335e-03, PNorm = 172.7387, GNorm = 0.2947, lr_0 = 1.3426e-04
Loss = 9.7222e-04, PNorm = 172.7406, GNorm = 0.0600, lr_0 = 1.3417e-04
Loss = 1.7274e-03, PNorm = 172.7408, GNorm = 0.0590, lr_0 = 1.3408e-04
Loss = 1.3250e-03, PNorm = 172.7434, GNorm = 0.1413, lr_0 = 1.3398e-04
Loss = 1.6494e-03, PNorm = 172.7465, GNorm = 0.2126, lr_0 = 1.3389e-04
Loss = 9.2477e-04, PNorm = 172.7488, GNorm = 0.0597, lr_0 = 1.3380e-04
Loss = 1.0707e-03, PNorm = 172.7507, GNorm = 0.1240, lr_0 = 1.3371e-04
Loss = 8.5569e-04, PNorm = 172.7534, GNorm = 0.0680, lr_0 = 1.3362e-04
Loss = 1.7572e-03, PNorm = 172.7542, GNorm = 0.1064, lr_0 = 1.3353e-04
Loss = 1.1574e-03, PNorm = 172.7563, GNorm = 0.0569, lr_0 = 1.3343e-04
Loss = 8.0939e-04, PNorm = 172.7584, GNorm = 0.0618, lr_0 = 1.3334e-04
Loss = 1.5280e-03, PNorm = 172.7630, GNorm = 0.1717, lr_0 = 1.3325e-04
Loss = 1.8783e-03, PNorm = 172.7662, GNorm = 0.0349, lr_0 = 1.3316e-04
Loss = 2.2139e-03, PNorm = 172.7677, GNorm = 0.2121, lr_0 = 1.3307e-04
Loss = 2.6423e-03, PNorm = 172.7694, GNorm = 0.1320, lr_0 = 1.3298e-04
Loss = 1.3344e-03, PNorm = 172.7713, GNorm = 0.1483, lr_0 = 1.3289e-04
Loss = 1.6467e-03, PNorm = 172.7735, GNorm = 0.2413, lr_0 = 1.3280e-04
Loss = 6.9784e-04, PNorm = 172.7762, GNorm = 0.0567, lr_0 = 1.3270e-04
Loss = 3.4131e-03, PNorm = 172.7765, GNorm = 0.0334, lr_0 = 1.3261e-04
Loss = 7.2025e-04, PNorm = 172.7764, GNorm = 0.0667, lr_0 = 1.3252e-04
Loss = 1.8207e-03, PNorm = 172.7781, GNorm = 0.0802, lr_0 = 1.3243e-04
Loss = 1.0274e-03, PNorm = 172.7814, GNorm = 0.0899, lr_0 = 1.3234e-04
Loss = 1.4530e-03, PNorm = 172.7837, GNorm = 0.0650, lr_0 = 1.3225e-04
Loss = 1.6164e-03, PNorm = 172.7851, GNorm = 0.0926, lr_0 = 1.3216e-04
Loss = 8.3476e-04, PNorm = 172.7864, GNorm = 0.2045, lr_0 = 1.3207e-04
Loss = 8.5588e-04, PNorm = 172.7892, GNorm = 0.0868, lr_0 = 1.3198e-04
Loss = 9.9339e-04, PNorm = 172.7915, GNorm = 0.1282, lr_0 = 1.3189e-04
Loss = 1.0543e-03, PNorm = 172.7934, GNorm = 0.0908, lr_0 = 1.3180e-04
Loss = 9.8572e-04, PNorm = 172.7951, GNorm = 0.0966, lr_0 = 1.3171e-04
Loss = 1.5167e-03, PNorm = 172.7967, GNorm = 0.0675, lr_0 = 1.3162e-04
Loss = 8.3252e-04, PNorm = 172.7992, GNorm = 0.0787, lr_0 = 1.3153e-04
Loss = 2.3199e-03, PNorm = 172.8005, GNorm = 0.1081, lr_0 = 1.3144e-04
Loss = 1.7134e-03, PNorm = 172.8015, GNorm = 0.0295, lr_0 = 1.3135e-04
Loss = 1.8763e-03, PNorm = 172.8026, GNorm = 0.1638, lr_0 = 1.3126e-04
Loss = 1.1784e-03, PNorm = 172.8039, GNorm = 0.1241, lr_0 = 1.3117e-04
Loss = 7.5407e-04, PNorm = 172.8047, GNorm = 0.0922, lr_0 = 1.3108e-04
Loss = 8.0096e-04, PNorm = 172.8057, GNorm = 0.0851, lr_0 = 1.3099e-04
Loss = 3.2427e-03, PNorm = 172.8069, GNorm = 0.2052, lr_0 = 1.3090e-04
Loss = 1.8105e-03, PNorm = 172.8087, GNorm = 0.2200, lr_0 = 1.3081e-04
Loss = 1.6348e-03, PNorm = 172.8117, GNorm = 0.0396, lr_0 = 1.3072e-04
Loss = 1.2048e-03, PNorm = 172.8133, GNorm = 0.0449, lr_0 = 1.3063e-04
Loss = 1.3906e-03, PNorm = 172.8146, GNorm = 0.0981, lr_0 = 1.3054e-04
Loss = 2.3373e-03, PNorm = 172.8161, GNorm = 0.0879, lr_0 = 1.3045e-04
Loss = 1.1298e-03, PNorm = 172.8170, GNorm = 0.0661, lr_0 = 1.3036e-04
Loss = 1.1822e-03, PNorm = 172.8189, GNorm = 0.1880, lr_0 = 1.3027e-04
Loss = 1.1483e-03, PNorm = 172.8204, GNorm = 0.2712, lr_0 = 1.3018e-04
Loss = 1.5872e-03, PNorm = 172.8224, GNorm = 0.0492, lr_0 = 1.3009e-04
Loss = 1.5420e-03, PNorm = 172.8258, GNorm = 0.2339, lr_0 = 1.3000e-04
Loss = 1.8189e-03, PNorm = 172.8276, GNorm = 0.1151, lr_0 = 1.2992e-04
Loss = 1.0812e-03, PNorm = 172.8305, GNorm = 0.2006, lr_0 = 1.2983e-04
Loss = 2.0899e-03, PNorm = 172.8314, GNorm = 0.2327, lr_0 = 1.2974e-04
Loss = 9.9614e-04, PNorm = 172.8327, GNorm = 0.1096, lr_0 = 1.2965e-04
Loss = 8.2013e-04, PNorm = 172.8354, GNorm = 0.1033, lr_0 = 1.2956e-04
Loss = 1.4740e-03, PNorm = 172.8371, GNorm = 0.0900, lr_0 = 1.2947e-04
Loss = 1.2540e-03, PNorm = 172.8389, GNorm = 0.1990, lr_0 = 1.2938e-04
Loss = 2.9882e-03, PNorm = 172.8398, GNorm = 0.3283, lr_0 = 1.2929e-04
Loss = 2.8451e-03, PNorm = 172.8405, GNorm = 0.0876, lr_0 = 1.2921e-04
Loss = 3.1618e-03, PNorm = 172.8412, GNorm = 0.4161, lr_0 = 1.2912e-04
Loss = 1.3166e-03, PNorm = 172.8424, GNorm = 0.0373, lr_0 = 1.2903e-04
Loss = 2.5827e-03, PNorm = 172.8432, GNorm = 0.0894, lr_0 = 1.2894e-04
Loss = 2.8640e-03, PNorm = 172.8439, GNorm = 0.2293, lr_0 = 1.2885e-04
Loss = 2.0991e-03, PNorm = 172.8470, GNorm = 0.0844, lr_0 = 1.2876e-04
Loss = 8.7008e-04, PNorm = 172.8481, GNorm = 0.0699, lr_0 = 1.2867e-04
Loss = 1.7397e-03, PNorm = 172.8507, GNorm = 0.1296, lr_0 = 1.2859e-04
Loss = 1.1257e-03, PNorm = 172.8533, GNorm = 0.1423, lr_0 = 1.2850e-04
Loss = 2.0197e-03, PNorm = 172.8569, GNorm = 0.0490, lr_0 = 1.2841e-04
Loss = 2.1698e-03, PNorm = 172.8594, GNorm = 0.1039, lr_0 = 1.2832e-04
Loss = 1.3135e-03, PNorm = 172.8610, GNorm = 0.0582, lr_0 = 1.2823e-04
Loss = 1.1473e-03, PNorm = 172.8632, GNorm = 0.0898, lr_0 = 1.2815e-04
Loss = 2.1436e-03, PNorm = 172.8655, GNorm = 0.0890, lr_0 = 1.2806e-04
Loss = 8.7995e-04, PNorm = 172.8673, GNorm = 0.1678, lr_0 = 1.2797e-04
Validation mae = 0.278001
Epoch 27
Loss = 1.7739e-03, PNorm = 172.8682, GNorm = 0.1059, lr_0 = 1.2788e-04
Loss = 1.5605e-03, PNorm = 172.8706, GNorm = 0.1514, lr_0 = 1.2780e-04
Loss = 1.2757e-03, PNorm = 172.8713, GNorm = 0.1513, lr_0 = 1.2771e-04
Loss = 8.5248e-04, PNorm = 172.8725, GNorm = 0.1782, lr_0 = 1.2762e-04
Loss = 7.3042e-04, PNorm = 172.8739, GNorm = 0.0347, lr_0 = 1.2753e-04
Loss = 1.4229e-03, PNorm = 172.8763, GNorm = 0.1075, lr_0 = 1.2745e-04
Loss = 8.2986e-04, PNorm = 172.8793, GNorm = 0.0661, lr_0 = 1.2736e-04
Loss = 1.2146e-03, PNorm = 172.8805, GNorm = 0.0622, lr_0 = 1.2727e-04
Loss = 1.5935e-03, PNorm = 172.8802, GNorm = 0.0422, lr_0 = 1.2718e-04
Loss = 2.0216e-03, PNorm = 172.8819, GNorm = 0.2800, lr_0 = 1.2710e-04
Loss = 7.6659e-04, PNorm = 172.8832, GNorm = 0.0860, lr_0 = 1.2701e-04
Loss = 1.1521e-03, PNorm = 172.8848, GNorm = 0.0940, lr_0 = 1.2692e-04
Loss = 2.1381e-03, PNorm = 172.8877, GNorm = 0.0595, lr_0 = 1.2684e-04
Loss = 7.3498e-04, PNorm = 172.8886, GNorm = 0.0798, lr_0 = 1.2675e-04
Loss = 1.2138e-03, PNorm = 172.8891, GNorm = 0.1188, lr_0 = 1.2666e-04
Loss = 9.7488e-04, PNorm = 172.8904, GNorm = 0.1394, lr_0 = 1.2658e-04
Loss = 9.1313e-04, PNorm = 172.8918, GNorm = 0.0472, lr_0 = 1.2649e-04
Loss = 9.4186e-04, PNorm = 172.8932, GNorm = 0.0767, lr_0 = 1.2640e-04
Loss = 1.4877e-03, PNorm = 172.8939, GNorm = 0.0435, lr_0 = 1.2632e-04
Loss = 1.0225e-03, PNorm = 172.8961, GNorm = 0.0620, lr_0 = 1.2623e-04
Loss = 1.0190e-03, PNorm = 172.8977, GNorm = 0.1796, lr_0 = 1.2614e-04
Loss = 8.0115e-04, PNorm = 172.8995, GNorm = 0.0542, lr_0 = 1.2606e-04
Loss = 3.3902e-03, PNorm = 172.9013, GNorm = 0.1613, lr_0 = 1.2597e-04
Loss = 7.4815e-04, PNorm = 172.9034, GNorm = 0.0796, lr_0 = 1.2588e-04
Loss = 1.3832e-03, PNorm = 172.9053, GNorm = 0.0927, lr_0 = 1.2580e-04
Loss = 9.0666e-04, PNorm = 172.9080, GNorm = 0.1015, lr_0 = 1.2571e-04
Loss = 3.4488e-03, PNorm = 172.9103, GNorm = 0.3552, lr_0 = 1.2563e-04
Loss = 1.8281e-03, PNorm = 172.9122, GNorm = 0.1074, lr_0 = 1.2554e-04
Loss = 7.1236e-04, PNorm = 172.9137, GNorm = 0.1197, lr_0 = 1.2545e-04
Loss = 8.8103e-04, PNorm = 172.9154, GNorm = 0.0580, lr_0 = 1.2537e-04
Loss = 1.1101e-03, PNorm = 172.9157, GNorm = 0.1189, lr_0 = 1.2528e-04
Loss = 1.9251e-03, PNorm = 172.9159, GNorm = 0.2038, lr_0 = 1.2520e-04
Loss = 1.0882e-03, PNorm = 172.9162, GNorm = 0.1103, lr_0 = 1.2511e-04
Loss = 1.3342e-03, PNorm = 172.9150, GNorm = 0.1093, lr_0 = 1.2502e-04
Loss = 8.9167e-04, PNorm = 172.9152, GNorm = 0.1489, lr_0 = 1.2494e-04
Loss = 1.8069e-03, PNorm = 172.9162, GNorm = 0.1524, lr_0 = 1.2485e-04
Loss = 8.2314e-04, PNorm = 172.9180, GNorm = 0.1368, lr_0 = 1.2477e-04
Loss = 3.4463e-03, PNorm = 172.9196, GNorm = 0.1073, lr_0 = 1.2468e-04
Loss = 1.2537e-03, PNorm = 172.9227, GNorm = 0.0984, lr_0 = 1.2460e-04
Loss = 7.4280e-04, PNorm = 172.9256, GNorm = 0.1304, lr_0 = 1.2451e-04
Loss = 1.1947e-03, PNorm = 172.9277, GNorm = 0.0484, lr_0 = 1.2443e-04
Loss = 1.0437e-03, PNorm = 172.9288, GNorm = 0.0885, lr_0 = 1.2434e-04
Loss = 1.8423e-03, PNorm = 172.9274, GNorm = 0.1285, lr_0 = 1.2426e-04
Loss = 8.3252e-04, PNorm = 172.9277, GNorm = 0.0535, lr_0 = 1.2417e-04
Loss = 1.6811e-03, PNorm = 172.9293, GNorm = 0.1244, lr_0 = 1.2409e-04
Loss = 6.4796e-04, PNorm = 172.9323, GNorm = 0.0593, lr_0 = 1.2400e-04
Loss = 3.1464e-03, PNorm = 172.9350, GNorm = 0.2743, lr_0 = 1.2392e-04
Loss = 1.0380e-03, PNorm = 172.9371, GNorm = 0.0509, lr_0 = 1.2383e-04
Loss = 8.4213e-04, PNorm = 172.9382, GNorm = 0.1497, lr_0 = 1.2375e-04
Loss = 2.4678e-03, PNorm = 172.9376, GNorm = 0.1200, lr_0 = 1.2366e-04
Loss = 1.1062e-03, PNorm = 172.9373, GNorm = 0.0402, lr_0 = 1.2358e-04
Loss = 1.3228e-03, PNorm = 172.9388, GNorm = 0.0895, lr_0 = 1.2349e-04
Loss = 1.3274e-03, PNorm = 172.9424, GNorm = 0.1274, lr_0 = 1.2341e-04
Loss = 1.6400e-03, PNorm = 172.9453, GNorm = 0.1409, lr_0 = 1.2332e-04
Loss = 1.6408e-03, PNorm = 172.9478, GNorm = 0.0321, lr_0 = 1.2324e-04
Loss = 2.0853e-03, PNorm = 172.9503, GNorm = 0.0328, lr_0 = 1.2315e-04
Loss = 1.0647e-03, PNorm = 172.9527, GNorm = 0.0614, lr_0 = 1.2307e-04
Loss = 3.6386e-03, PNorm = 172.9555, GNorm = 0.0742, lr_0 = 1.2298e-04
Loss = 1.7583e-03, PNorm = 172.9595, GNorm = 0.1162, lr_0 = 1.2290e-04
Loss = 1.2772e-03, PNorm = 172.9618, GNorm = 0.1487, lr_0 = 1.2282e-04
Loss = 1.2537e-03, PNorm = 172.9633, GNorm = 0.1323, lr_0 = 1.2273e-04
Loss = 2.2771e-03, PNorm = 172.9630, GNorm = 0.1325, lr_0 = 1.2265e-04
Loss = 6.4825e-04, PNorm = 172.9645, GNorm = 0.0547, lr_0 = 1.2256e-04
Loss = 1.8230e-03, PNorm = 172.9665, GNorm = 0.0527, lr_0 = 1.2248e-04
Loss = 2.5764e-03, PNorm = 172.9684, GNorm = 0.1219, lr_0 = 1.2240e-04
Loss = 1.2611e-03, PNorm = 172.9702, GNorm = 0.0512, lr_0 = 1.2231e-04
Loss = 1.6979e-03, PNorm = 172.9713, GNorm = 0.1086, lr_0 = 1.2223e-04
Loss = 1.6110e-03, PNorm = 172.9727, GNorm = 0.2275, lr_0 = 1.2214e-04
Loss = 2.6107e-03, PNorm = 172.9716, GNorm = 0.1200, lr_0 = 1.2206e-04
Loss = 1.2291e-03, PNorm = 172.9725, GNorm = 0.0397, lr_0 = 1.2198e-04
Loss = 1.5531e-03, PNorm = 172.9748, GNorm = 0.1258, lr_0 = 1.2189e-04
Loss = 6.7551e-04, PNorm = 172.9769, GNorm = 0.0943, lr_0 = 1.2181e-04
Loss = 1.0386e-03, PNorm = 172.9775, GNorm = 0.0506, lr_0 = 1.2173e-04
Loss = 2.3015e-03, PNorm = 172.9781, GNorm = 0.0943, lr_0 = 1.2164e-04
Loss = 7.9292e-04, PNorm = 172.9801, GNorm = 0.1662, lr_0 = 1.2156e-04
Loss = 1.1318e-03, PNorm = 172.9824, GNorm = 0.0417, lr_0 = 1.2148e-04
Loss = 1.1104e-03, PNorm = 172.9843, GNorm = 0.0539, lr_0 = 1.2139e-04
Loss = 2.9590e-03, PNorm = 172.9857, GNorm = 0.2246, lr_0 = 1.2131e-04
Loss = 1.3483e-03, PNorm = 172.9869, GNorm = 0.1421, lr_0 = 1.2123e-04
Loss = 1.2756e-03, PNorm = 172.9894, GNorm = 0.0685, lr_0 = 1.2114e-04
Loss = 6.4368e-04, PNorm = 172.9904, GNorm = 0.1191, lr_0 = 1.2106e-04
Loss = 6.9849e-04, PNorm = 172.9913, GNorm = 0.1020, lr_0 = 1.2098e-04
Loss = 2.5859e-03, PNorm = 172.9923, GNorm = 0.1281, lr_0 = 1.2090e-04
Loss = 1.1997e-03, PNorm = 172.9944, GNorm = 0.1133, lr_0 = 1.2081e-04
Loss = 1.1030e-03, PNorm = 172.9965, GNorm = 0.0946, lr_0 = 1.2073e-04
Loss = 2.1249e-03, PNorm = 172.9992, GNorm = 0.1334, lr_0 = 1.2065e-04
Loss = 1.7401e-03, PNorm = 173.0013, GNorm = 0.0981, lr_0 = 1.2056e-04
Loss = 8.7704e-04, PNorm = 173.0045, GNorm = 0.1541, lr_0 = 1.2048e-04
Loss = 9.8588e-04, PNorm = 173.0065, GNorm = 0.0862, lr_0 = 1.2040e-04
Loss = 1.9871e-03, PNorm = 173.0077, GNorm = 0.0821, lr_0 = 1.2032e-04
Loss = 2.2171e-03, PNorm = 173.0097, GNorm = 0.1597, lr_0 = 1.2023e-04
Loss = 1.1538e-03, PNorm = 173.0112, GNorm = 0.1062, lr_0 = 1.2015e-04
Loss = 7.3852e-04, PNorm = 173.0122, GNorm = 0.0915, lr_0 = 1.2007e-04
Loss = 6.1705e-04, PNorm = 173.0137, GNorm = 0.0669, lr_0 = 1.1999e-04
Loss = 1.5226e-03, PNorm = 173.0146, GNorm = 0.0692, lr_0 = 1.1991e-04
Loss = 2.4932e-03, PNorm = 173.0154, GNorm = 0.0897, lr_0 = 1.1982e-04
Loss = 1.0505e-03, PNorm = 173.0161, GNorm = 0.0785, lr_0 = 1.1974e-04
Loss = 7.3326e-04, PNorm = 173.0177, GNorm = 0.1009, lr_0 = 1.1966e-04
Loss = 3.7338e-03, PNorm = 173.0194, GNorm = 0.0906, lr_0 = 1.1958e-04
Loss = 2.0614e-03, PNorm = 173.0213, GNorm = 0.1089, lr_0 = 1.1950e-04
Loss = 1.3440e-03, PNorm = 173.0236, GNorm = 0.1220, lr_0 = 1.1941e-04
Loss = 6.4303e-04, PNorm = 173.0260, GNorm = 0.0766, lr_0 = 1.1933e-04
Loss = 1.0182e-03, PNorm = 173.0296, GNorm = 0.0901, lr_0 = 1.1925e-04
Loss = 1.4461e-03, PNorm = 173.0320, GNorm = 0.1122, lr_0 = 1.1917e-04
Loss = 1.6387e-03, PNorm = 173.0337, GNorm = 0.0311, lr_0 = 1.1909e-04
Loss = 1.2626e-03, PNorm = 173.0345, GNorm = 0.1534, lr_0 = 1.1901e-04
Loss = 1.3602e-03, PNorm = 173.0373, GNorm = 0.0516, lr_0 = 1.1892e-04
Loss = 9.0785e-04, PNorm = 173.0390, GNorm = 0.0487, lr_0 = 1.1884e-04
Loss = 1.0361e-03, PNorm = 173.0399, GNorm = 0.0777, lr_0 = 1.1876e-04
Loss = 5.9977e-04, PNorm = 173.0415, GNorm = 0.0599, lr_0 = 1.1868e-04
Loss = 1.8554e-03, PNorm = 173.0419, GNorm = 0.1117, lr_0 = 1.1860e-04
Loss = 7.9248e-04, PNorm = 173.0425, GNorm = 0.0914, lr_0 = 1.1852e-04
Loss = 1.5195e-03, PNorm = 173.0453, GNorm = 0.1190, lr_0 = 1.1844e-04
Loss = 6.5974e-04, PNorm = 173.0475, GNorm = 0.1019, lr_0 = 1.1835e-04
Loss = 9.0847e-04, PNorm = 173.0496, GNorm = 0.0517, lr_0 = 1.1827e-04
Loss = 1.6285e-03, PNorm = 173.0518, GNorm = 0.1519, lr_0 = 1.1819e-04
Loss = 7.9618e-04, PNorm = 173.0541, GNorm = 0.0659, lr_0 = 1.1811e-04
Loss = 7.6163e-04, PNorm = 173.0552, GNorm = 0.1456, lr_0 = 1.1803e-04
Loss = 1.5318e-03, PNorm = 173.0564, GNorm = 0.1421, lr_0 = 1.1795e-04
Loss = 5.6159e-04, PNorm = 173.0574, GNorm = 0.0351, lr_0 = 1.1787e-04
Validation mae = 0.277941
Epoch 28
Loss = 6.2888e-04, PNorm = 173.0586, GNorm = 0.0575, lr_0 = 1.1779e-04
Loss = 1.6609e-03, PNorm = 173.0596, GNorm = 0.2330, lr_0 = 1.1771e-04
Loss = 6.7748e-04, PNorm = 173.0611, GNorm = 0.1196, lr_0 = 1.1763e-04
Loss = 1.4335e-03, PNorm = 173.0617, GNorm = 0.1036, lr_0 = 1.1755e-04
Loss = 5.5153e-04, PNorm = 173.0625, GNorm = 0.0680, lr_0 = 1.1747e-04
Loss = 9.2831e-04, PNorm = 173.0630, GNorm = 0.0711, lr_0 = 1.1739e-04
Loss = 5.2150e-04, PNorm = 173.0636, GNorm = 0.0257, lr_0 = 1.1730e-04
Loss = 1.3688e-03, PNorm = 173.0661, GNorm = 0.1062, lr_0 = 1.1722e-04
Loss = 9.0841e-04, PNorm = 173.0680, GNorm = 0.0433, lr_0 = 1.1714e-04
Loss = 8.0842e-04, PNorm = 173.0679, GNorm = 0.0491, lr_0 = 1.1706e-04
Loss = 6.7705e-04, PNorm = 173.0681, GNorm = 0.0344, lr_0 = 1.1698e-04
Loss = 1.0078e-03, PNorm = 173.0694, GNorm = 0.2741, lr_0 = 1.1690e-04
Loss = 1.0895e-03, PNorm = 173.0705, GNorm = 0.1012, lr_0 = 1.1682e-04
Loss = 8.3166e-04, PNorm = 173.0718, GNorm = 0.0844, lr_0 = 1.1674e-04
Loss = 8.4400e-04, PNorm = 173.0736, GNorm = 0.1161, lr_0 = 1.1666e-04
Loss = 1.0980e-03, PNorm = 173.0750, GNorm = 0.1040, lr_0 = 1.1658e-04
Loss = 7.1946e-04, PNorm = 173.0769, GNorm = 0.0950, lr_0 = 1.1650e-04
Loss = 6.4476e-04, PNorm = 173.0773, GNorm = 0.0955, lr_0 = 1.1642e-04
Loss = 1.0456e-03, PNorm = 173.0787, GNorm = 0.0566, lr_0 = 1.1634e-04
Loss = 1.2950e-03, PNorm = 173.0799, GNorm = 0.0805, lr_0 = 1.1626e-04
Loss = 1.4938e-03, PNorm = 173.0815, GNorm = 0.0811, lr_0 = 1.1618e-04
Loss = 1.2978e-03, PNorm = 173.0812, GNorm = 0.0448, lr_0 = 1.1611e-04
Loss = 1.7520e-03, PNorm = 173.0816, GNorm = 0.0474, lr_0 = 1.1603e-04
Loss = 2.4638e-03, PNorm = 173.0826, GNorm = 0.1110, lr_0 = 1.1595e-04
Loss = 8.0836e-04, PNorm = 173.0837, GNorm = 0.0735, lr_0 = 1.1587e-04
Loss = 1.4914e-03, PNorm = 173.0856, GNorm = 0.0914, lr_0 = 1.1579e-04
Loss = 1.1123e-03, PNorm = 173.0872, GNorm = 0.1285, lr_0 = 1.1571e-04
Loss = 1.3722e-03, PNorm = 173.0882, GNorm = 0.0271, lr_0 = 1.1563e-04
Loss = 7.0983e-04, PNorm = 173.0890, GNorm = 0.1621, lr_0 = 1.1555e-04
Loss = 1.2557e-03, PNorm = 173.0910, GNorm = 0.0576, lr_0 = 1.1547e-04
Loss = 1.1691e-03, PNorm = 173.0927, GNorm = 0.0345, lr_0 = 1.1539e-04
Loss = 2.3106e-03, PNorm = 173.0958, GNorm = 0.1129, lr_0 = 1.1531e-04
Loss = 1.0667e-03, PNorm = 173.0981, GNorm = 0.0718, lr_0 = 1.1523e-04
Loss = 1.4831e-03, PNorm = 173.0993, GNorm = 0.0998, lr_0 = 1.1515e-04
Loss = 6.5424e-04, PNorm = 173.1002, GNorm = 0.0831, lr_0 = 1.1508e-04
Loss = 1.2002e-03, PNorm = 173.0999, GNorm = 0.0774, lr_0 = 1.1500e-04
Loss = 1.3152e-03, PNorm = 173.1021, GNorm = 0.0219, lr_0 = 1.1492e-04
Loss = 2.9025e-03, PNorm = 173.1032, GNorm = 0.1937, lr_0 = 1.1484e-04
Loss = 1.7649e-03, PNorm = 173.1049, GNorm = 0.3787, lr_0 = 1.1476e-04
Loss = 6.9050e-04, PNorm = 173.1059, GNorm = 0.0628, lr_0 = 1.1468e-04
Loss = 7.1698e-04, PNorm = 173.1069, GNorm = 0.1159, lr_0 = 1.1460e-04
Loss = 3.1269e-03, PNorm = 173.1085, GNorm = 0.0976, lr_0 = 1.1452e-04
Loss = 5.7176e-04, PNorm = 173.1092, GNorm = 0.0726, lr_0 = 1.1445e-04
Loss = 6.9547e-04, PNorm = 173.1110, GNorm = 0.1547, lr_0 = 1.1437e-04
Loss = 1.3828e-03, PNorm = 173.1125, GNorm = 0.0244, lr_0 = 1.1429e-04
Loss = 1.9083e-03, PNorm = 173.1146, GNorm = 0.1679, lr_0 = 1.1421e-04
Loss = 7.2052e-04, PNorm = 173.1167, GNorm = 0.0445, lr_0 = 1.1413e-04
Loss = 5.6862e-04, PNorm = 173.1175, GNorm = 0.0932, lr_0 = 1.1405e-04
Loss = 8.6014e-04, PNorm = 173.1177, GNorm = 0.1058, lr_0 = 1.1398e-04
Loss = 1.2397e-03, PNorm = 173.1179, GNorm = 0.0419, lr_0 = 1.1390e-04
Loss = 3.8648e-03, PNorm = 173.1181, GNorm = 0.0839, lr_0 = 1.1382e-04
Loss = 6.3992e-04, PNorm = 173.1194, GNorm = 0.1223, lr_0 = 1.1374e-04
Loss = 1.2993e-03, PNorm = 173.1207, GNorm = 0.0970, lr_0 = 1.1366e-04
Loss = 1.5329e-03, PNorm = 173.1233, GNorm = 0.1761, lr_0 = 1.1359e-04
Loss = 7.2489e-04, PNorm = 173.1249, GNorm = 0.2074, lr_0 = 1.1351e-04
Loss = 5.5043e-04, PNorm = 173.1283, GNorm = 0.0450, lr_0 = 1.1343e-04
Loss = 1.1291e-03, PNorm = 173.1290, GNorm = 0.5405, lr_0 = 1.1335e-04
Loss = 7.8256e-04, PNorm = 173.1286, GNorm = 0.0653, lr_0 = 1.1328e-04
Loss = 1.0784e-03, PNorm = 173.1296, GNorm = 0.1487, lr_0 = 1.1320e-04
Loss = 7.2040e-04, PNorm = 173.1293, GNorm = 0.1266, lr_0 = 1.1312e-04
Loss = 9.0441e-04, PNorm = 173.1298, GNorm = 0.1457, lr_0 = 1.1304e-04
Loss = 1.1930e-03, PNorm = 173.1306, GNorm = 0.1504, lr_0 = 1.1297e-04
Loss = 8.3665e-04, PNorm = 173.1314, GNorm = 0.0590, lr_0 = 1.1289e-04
Loss = 1.0152e-03, PNorm = 173.1314, GNorm = 0.0806, lr_0 = 1.1281e-04
Loss = 1.1950e-03, PNorm = 173.1324, GNorm = 0.0870, lr_0 = 1.1273e-04
Loss = 2.2503e-03, PNorm = 173.1334, GNorm = 0.0547, lr_0 = 1.1266e-04
Loss = 1.9083e-03, PNorm = 173.1366, GNorm = 0.2120, lr_0 = 1.1258e-04
Loss = 1.1632e-03, PNorm = 173.1382, GNorm = 0.0598, lr_0 = 1.1250e-04
Loss = 9.6095e-04, PNorm = 173.1400, GNorm = 0.0235, lr_0 = 1.1243e-04
Loss = 1.1911e-03, PNorm = 173.1408, GNorm = 0.0719, lr_0 = 1.1235e-04
Loss = 1.4609e-03, PNorm = 173.1420, GNorm = 0.0933, lr_0 = 1.1227e-04
Loss = 6.3958e-04, PNorm = 173.1440, GNorm = 0.0832, lr_0 = 1.1219e-04
Loss = 3.6102e-03, PNorm = 173.1450, GNorm = 0.0987, lr_0 = 1.1212e-04
Loss = 1.1251e-03, PNorm = 173.1460, GNorm = 0.1749, lr_0 = 1.1204e-04
Loss = 2.4547e-03, PNorm = 173.1461, GNorm = 0.0699, lr_0 = 1.1196e-04
Loss = 8.0880e-04, PNorm = 173.1467, GNorm = 0.1431, lr_0 = 1.1189e-04
Loss = 5.7578e-04, PNorm = 173.1485, GNorm = 0.0482, lr_0 = 1.1181e-04
Loss = 1.7586e-03, PNorm = 173.1505, GNorm = 0.0200, lr_0 = 1.1173e-04
Loss = 1.2725e-03, PNorm = 173.1536, GNorm = 0.0620, lr_0 = 1.1166e-04
Loss = 1.3911e-03, PNorm = 173.1566, GNorm = 0.1140, lr_0 = 1.1158e-04
Loss = 1.9014e-03, PNorm = 173.1576, GNorm = 0.0646, lr_0 = 1.1150e-04
Loss = 1.5121e-03, PNorm = 173.1576, GNorm = 0.1672, lr_0 = 1.1143e-04
Loss = 8.5709e-04, PNorm = 173.1587, GNorm = 0.0680, lr_0 = 1.1135e-04
Loss = 5.5488e-04, PNorm = 173.1597, GNorm = 0.0705, lr_0 = 1.1128e-04
Loss = 1.5707e-03, PNorm = 173.1607, GNorm = 0.1086, lr_0 = 1.1120e-04
Loss = 6.8036e-04, PNorm = 173.1633, GNorm = 0.0752, lr_0 = 1.1112e-04
Loss = 2.6041e-03, PNorm = 173.1645, GNorm = 0.0705, lr_0 = 1.1105e-04
Loss = 9.3743e-04, PNorm = 173.1648, GNorm = 0.1454, lr_0 = 1.1097e-04
Loss = 2.1034e-03, PNorm = 173.1658, GNorm = 0.0809, lr_0 = 1.1089e-04
Loss = 2.2662e-03, PNorm = 173.1686, GNorm = 0.0854, lr_0 = 1.1082e-04
Loss = 3.8026e-03, PNorm = 173.1707, GNorm = 0.0718, lr_0 = 1.1074e-04
Loss = 7.7040e-04, PNorm = 173.1730, GNorm = 0.0743, lr_0 = 1.1067e-04
Loss = 1.2737e-03, PNorm = 173.1743, GNorm = 0.1647, lr_0 = 1.1059e-04
Loss = 6.1763e-04, PNorm = 173.1765, GNorm = 0.0896, lr_0 = 1.1052e-04
Loss = 7.3547e-04, PNorm = 173.1784, GNorm = 0.0388, lr_0 = 1.1044e-04
Loss = 1.8814e-03, PNorm = 173.1800, GNorm = 0.0402, lr_0 = 1.1036e-04
Loss = 1.3556e-03, PNorm = 173.1820, GNorm = 0.0362, lr_0 = 1.1029e-04
Loss = 9.7678e-04, PNorm = 173.1844, GNorm = 0.1264, lr_0 = 1.1021e-04
Loss = 1.5335e-03, PNorm = 173.1855, GNorm = 0.0467, lr_0 = 1.1014e-04
Loss = 1.8963e-03, PNorm = 173.1863, GNorm = 0.0479, lr_0 = 1.1006e-04
Loss = 1.2676e-03, PNorm = 173.1883, GNorm = 0.0923, lr_0 = 1.0999e-04
Loss = 7.6471e-04, PNorm = 173.1904, GNorm = 0.1742, lr_0 = 1.0991e-04
Loss = 2.4982e-03, PNorm = 173.1918, GNorm = 0.0837, lr_0 = 1.0984e-04
Loss = 1.0425e-03, PNorm = 173.1934, GNorm = 0.0579, lr_0 = 1.0976e-04
Loss = 7.4698e-04, PNorm = 173.1945, GNorm = 0.1156, lr_0 = 1.0969e-04
Loss = 1.6098e-03, PNorm = 173.1961, GNorm = 0.1713, lr_0 = 1.0961e-04
Loss = 1.7218e-03, PNorm = 173.1972, GNorm = 0.0411, lr_0 = 1.0954e-04
Loss = 1.0642e-03, PNorm = 173.1980, GNorm = 0.0486, lr_0 = 1.0946e-04
Loss = 9.9791e-04, PNorm = 173.1990, GNorm = 0.0798, lr_0 = 1.0939e-04
Loss = 2.6249e-03, PNorm = 173.2016, GNorm = 0.0924, lr_0 = 1.0931e-04
Loss = 6.5668e-04, PNorm = 173.2039, GNorm = 0.1734, lr_0 = 1.0924e-04
Loss = 1.4007e-03, PNorm = 173.2064, GNorm = 0.0506, lr_0 = 1.0916e-04
Loss = 6.8631e-04, PNorm = 173.2071, GNorm = 0.1018, lr_0 = 1.0909e-04
Loss = 2.6001e-03, PNorm = 173.2076, GNorm = 0.0797, lr_0 = 1.0901e-04
Loss = 1.7243e-03, PNorm = 173.2086, GNorm = 0.0621, lr_0 = 1.0894e-04
Loss = 9.8580e-04, PNorm = 173.2106, GNorm = 0.1306, lr_0 = 1.0886e-04
Loss = 1.2068e-03, PNorm = 173.2123, GNorm = 0.0860, lr_0 = 1.0879e-04
Loss = 1.0393e-03, PNorm = 173.2137, GNorm = 0.0710, lr_0 = 1.0871e-04
Loss = 9.8278e-04, PNorm = 173.2154, GNorm = 0.0947, lr_0 = 1.0864e-04
Loss = 1.2841e-03, PNorm = 173.2171, GNorm = 0.1206, lr_0 = 1.0856e-04
Validation mae = 0.277877
Epoch 29
Loss = 1.0079e-03, PNorm = 173.2177, GNorm = 0.0881, lr_0 = 1.0849e-04
Loss = 1.3865e-03, PNorm = 173.2192, GNorm = 0.0559, lr_0 = 1.0841e-04
Loss = 5.7396e-04, PNorm = 173.2211, GNorm = 0.0358, lr_0 = 1.0834e-04
Loss = 1.2889e-03, PNorm = 173.2229, GNorm = 0.0646, lr_0 = 1.0827e-04
Loss = 1.4016e-03, PNorm = 173.2233, GNorm = 0.0923, lr_0 = 1.0819e-04
Loss = 8.0779e-04, PNorm = 173.2243, GNorm = 0.1288, lr_0 = 1.0812e-04
Loss = 1.4414e-03, PNorm = 173.2249, GNorm = 0.0918, lr_0 = 1.0804e-04
Loss = 6.1848e-04, PNorm = 173.2259, GNorm = 0.0691, lr_0 = 1.0797e-04
Loss = 1.5399e-03, PNorm = 173.2282, GNorm = 0.1692, lr_0 = 1.0790e-04
Loss = 7.2916e-04, PNorm = 173.2283, GNorm = 0.1640, lr_0 = 1.0782e-04
Loss = 3.0678e-03, PNorm = 173.2284, GNorm = 0.4345, lr_0 = 1.0775e-04
Loss = 6.0308e-04, PNorm = 173.2298, GNorm = 0.0663, lr_0 = 1.0767e-04
Loss = 6.7465e-04, PNorm = 173.2308, GNorm = 0.1379, lr_0 = 1.0760e-04
Loss = 7.1633e-04, PNorm = 173.2333, GNorm = 0.0822, lr_0 = 1.0753e-04
Loss = 1.1555e-03, PNorm = 173.2360, GNorm = 0.0331, lr_0 = 1.0745e-04
Loss = 5.3759e-04, PNorm = 173.2385, GNorm = 0.0810, lr_0 = 1.0738e-04
Loss = 7.7490e-04, PNorm = 173.2399, GNorm = 0.0766, lr_0 = 1.0731e-04
Loss = 5.4333e-04, PNorm = 173.2409, GNorm = 0.0293, lr_0 = 1.0723e-04
Loss = 1.0051e-03, PNorm = 173.2417, GNorm = 0.0527, lr_0 = 1.0716e-04
Loss = 5.7023e-04, PNorm = 173.2431, GNorm = 0.0887, lr_0 = 1.0709e-04
Loss = 7.8700e-04, PNorm = 173.2432, GNorm = 0.0460, lr_0 = 1.0701e-04
Loss = 6.6454e-04, PNorm = 173.2426, GNorm = 0.1235, lr_0 = 1.0694e-04
Loss = 1.4286e-03, PNorm = 173.2428, GNorm = 0.1406, lr_0 = 1.0687e-04
Loss = 9.4765e-04, PNorm = 173.2439, GNorm = 0.0883, lr_0 = 1.0679e-04
Loss = 5.2794e-04, PNorm = 173.2456, GNorm = 0.0502, lr_0 = 1.0672e-04
Loss = 2.1636e-03, PNorm = 173.2483, GNorm = 0.0757, lr_0 = 1.0665e-04
Loss = 9.8050e-04, PNorm = 173.2509, GNorm = 0.0475, lr_0 = 1.0657e-04
Loss = 1.3602e-03, PNorm = 173.2526, GNorm = 0.0821, lr_0 = 1.0650e-04
Loss = 6.5696e-04, PNorm = 173.2539, GNorm = 0.0867, lr_0 = 1.0643e-04
Loss = 7.4916e-04, PNorm = 173.2545, GNorm = 0.0989, lr_0 = 1.0635e-04
Loss = 1.5179e-03, PNorm = 173.2547, GNorm = 0.1194, lr_0 = 1.0628e-04
Loss = 1.7452e-03, PNorm = 173.2546, GNorm = 0.0248, lr_0 = 1.0621e-04
Loss = 1.4624e-03, PNorm = 173.2542, GNorm = 0.0366, lr_0 = 1.0614e-04
Loss = 1.0739e-03, PNorm = 173.2553, GNorm = 0.0552, lr_0 = 1.0606e-04
Loss = 5.3354e-04, PNorm = 173.2561, GNorm = 0.0399, lr_0 = 1.0599e-04
Loss = 9.6827e-04, PNorm = 173.2564, GNorm = 0.1432, lr_0 = 1.0592e-04
Loss = 5.7719e-04, PNorm = 173.2564, GNorm = 0.0654, lr_0 = 1.0585e-04
Loss = 1.2666e-03, PNorm = 173.2575, GNorm = 0.0786, lr_0 = 1.0577e-04
Loss = 1.4503e-03, PNorm = 173.2578, GNorm = 0.0186, lr_0 = 1.0570e-04
Loss = 4.4452e-04, PNorm = 173.2590, GNorm = 0.0437, lr_0 = 1.0563e-04
Loss = 3.7740e-03, PNorm = 173.2586, GNorm = 0.3219, lr_0 = 1.0556e-04
Loss = 1.4698e-03, PNorm = 173.2603, GNorm = 0.0650, lr_0 = 1.0548e-04
Loss = 2.3644e-03, PNorm = 173.2628, GNorm = 0.1182, lr_0 = 1.0541e-04
Loss = 8.1251e-04, PNorm = 173.2653, GNorm = 0.0643, lr_0 = 1.0534e-04
Loss = 2.1823e-03, PNorm = 173.2671, GNorm = 0.2017, lr_0 = 1.0527e-04
Loss = 2.0582e-03, PNorm = 173.2679, GNorm = 0.2395, lr_0 = 1.0519e-04
Loss = 5.2527e-04, PNorm = 173.2672, GNorm = 0.0886, lr_0 = 1.0512e-04
Loss = 1.4982e-03, PNorm = 173.2668, GNorm = 0.0378, lr_0 = 1.0505e-04
Loss = 7.9189e-04, PNorm = 173.2677, GNorm = 0.0768, lr_0 = 1.0498e-04
Loss = 1.0136e-03, PNorm = 173.2698, GNorm = 0.0647, lr_0 = 1.0491e-04
Loss = 1.6423e-03, PNorm = 173.2708, GNorm = 0.2094, lr_0 = 1.0483e-04
Loss = 1.4158e-03, PNorm = 173.2724, GNorm = 0.0479, lr_0 = 1.0476e-04
Loss = 7.6271e-04, PNorm = 173.2730, GNorm = 0.0495, lr_0 = 1.0469e-04
Loss = 1.5521e-03, PNorm = 173.2733, GNorm = 0.0302, lr_0 = 1.0462e-04
Loss = 4.5847e-03, PNorm = 173.2727, GNorm = 0.1783, lr_0 = 1.0455e-04
Loss = 8.1397e-04, PNorm = 173.2737, GNorm = 0.1698, lr_0 = 1.0448e-04
Loss = 2.2512e-03, PNorm = 173.2743, GNorm = 0.0503, lr_0 = 1.0440e-04
Loss = 1.2536e-03, PNorm = 173.2748, GNorm = 0.1056, lr_0 = 1.0433e-04
Loss = 1.0362e-03, PNorm = 173.2756, GNorm = 0.1234, lr_0 = 1.0426e-04
Loss = 6.2883e-04, PNorm = 173.2767, GNorm = 0.1543, lr_0 = 1.0419e-04
Loss = 6.1204e-04, PNorm = 173.2789, GNorm = 0.0819, lr_0 = 1.0412e-04
Loss = 2.4720e-03, PNorm = 173.2816, GNorm = 0.0594, lr_0 = 1.0405e-04
Loss = 6.3010e-04, PNorm = 173.2842, GNorm = 0.0260, lr_0 = 1.0398e-04
Loss = 2.6195e-03, PNorm = 173.2854, GNorm = 0.0736, lr_0 = 1.0391e-04
Loss = 2.0880e-03, PNorm = 173.2866, GNorm = 0.0371, lr_0 = 1.0383e-04
Loss = 7.1915e-04, PNorm = 173.2872, GNorm = 0.0916, lr_0 = 1.0376e-04
Loss = 6.1572e-04, PNorm = 173.2881, GNorm = 0.0840, lr_0 = 1.0369e-04
Loss = 6.8759e-04, PNorm = 173.2889, GNorm = 0.2479, lr_0 = 1.0362e-04
Loss = 4.9842e-04, PNorm = 173.2893, GNorm = 0.1319, lr_0 = 1.0355e-04
Loss = 7.0839e-04, PNorm = 173.2903, GNorm = 0.0421, lr_0 = 1.0348e-04
Loss = 5.2639e-04, PNorm = 173.2917, GNorm = 0.0257, lr_0 = 1.0341e-04
Loss = 5.0852e-04, PNorm = 173.2935, GNorm = 0.0724, lr_0 = 1.0334e-04
Loss = 8.6201e-04, PNorm = 173.2942, GNorm = 0.1078, lr_0 = 1.0327e-04
Loss = 1.2124e-03, PNorm = 173.2941, GNorm = 0.0369, lr_0 = 1.0320e-04
Loss = 2.7136e-03, PNorm = 173.2934, GNorm = 0.0665, lr_0 = 1.0312e-04
Loss = 7.5424e-04, PNorm = 173.2940, GNorm = 0.0640, lr_0 = 1.0305e-04
Loss = 5.5438e-04, PNorm = 173.2962, GNorm = 0.1051, lr_0 = 1.0298e-04
Loss = 6.7342e-04, PNorm = 173.2987, GNorm = 0.0377, lr_0 = 1.0291e-04
Loss = 2.1070e-03, PNorm = 173.3002, GNorm = 0.0273, lr_0 = 1.0284e-04
Loss = 1.8076e-03, PNorm = 173.3008, GNorm = 0.2727, lr_0 = 1.0277e-04
Loss = 5.8922e-04, PNorm = 173.3022, GNorm = 0.0398, lr_0 = 1.0270e-04
Loss = 1.0959e-03, PNorm = 173.3027, GNorm = 0.1386, lr_0 = 1.0263e-04
Loss = 6.4839e-04, PNorm = 173.3033, GNorm = 0.0606, lr_0 = 1.0256e-04
Loss = 1.7030e-03, PNorm = 173.3056, GNorm = 0.0376, lr_0 = 1.0249e-04
Loss = 5.2552e-04, PNorm = 173.3070, GNorm = 0.0804, lr_0 = 1.0242e-04
Loss = 1.0108e-03, PNorm = 173.3088, GNorm = 0.1034, lr_0 = 1.0235e-04
Loss = 1.8720e-03, PNorm = 173.3095, GNorm = 0.1157, lr_0 = 1.0228e-04
Loss = 6.3965e-04, PNorm = 173.3105, GNorm = 0.1310, lr_0 = 1.0221e-04
Loss = 5.1047e-04, PNorm = 173.3133, GNorm = 0.1123, lr_0 = 1.0214e-04
Loss = 3.2004e-03, PNorm = 173.3151, GNorm = 0.1382, lr_0 = 1.0207e-04
Loss = 1.2604e-03, PNorm = 173.3181, GNorm = 0.1022, lr_0 = 1.0200e-04
Loss = 1.0152e-03, PNorm = 173.3199, GNorm = 0.0962, lr_0 = 1.0193e-04
Loss = 9.9105e-04, PNorm = 173.3205, GNorm = 0.0823, lr_0 = 1.0186e-04
Loss = 5.3999e-04, PNorm = 173.3218, GNorm = 0.0850, lr_0 = 1.0179e-04
Loss = 9.8744e-04, PNorm = 173.3231, GNorm = 0.0493, lr_0 = 1.0172e-04
Loss = 1.8008e-03, PNorm = 173.3254, GNorm = 0.0370, lr_0 = 1.0165e-04
Loss = 5.3870e-04, PNorm = 173.3277, GNorm = 0.0374, lr_0 = 1.0158e-04
Loss = 5.3613e-04, PNorm = 173.3290, GNorm = 0.0401, lr_0 = 1.0151e-04
Loss = 1.1641e-03, PNorm = 173.3312, GNorm = 0.0825, lr_0 = 1.0144e-04
Loss = 2.3146e-03, PNorm = 173.3334, GNorm = 0.0671, lr_0 = 1.0137e-04
Loss = 2.4920e-03, PNorm = 173.3348, GNorm = 0.0600, lr_0 = 1.0130e-04
Loss = 5.2116e-04, PNorm = 173.3348, GNorm = 0.0857, lr_0 = 1.0123e-04
Loss = 6.0315e-04, PNorm = 173.3347, GNorm = 0.0867, lr_0 = 1.0116e-04
Loss = 1.3891e-03, PNorm = 173.3346, GNorm = 0.1527, lr_0 = 1.0110e-04
Loss = 1.0254e-03, PNorm = 173.3364, GNorm = 0.0643, lr_0 = 1.0103e-04
Loss = 4.4820e-04, PNorm = 173.3383, GNorm = 0.0588, lr_0 = 1.0096e-04
Loss = 4.7681e-04, PNorm = 173.3401, GNorm = 0.0513, lr_0 = 1.0089e-04
Loss = 1.3640e-03, PNorm = 173.3412, GNorm = 0.0369, lr_0 = 1.0082e-04
Loss = 1.4681e-03, PNorm = 173.3413, GNorm = 0.0745, lr_0 = 1.0075e-04
Loss = 8.1819e-04, PNorm = 173.3410, GNorm = 0.0543, lr_0 = 1.0068e-04
Loss = 5.4113e-04, PNorm = 173.3405, GNorm = 0.0613, lr_0 = 1.0061e-04
Loss = 1.2594e-03, PNorm = 173.3423, GNorm = 0.0509, lr_0 = 1.0054e-04
Loss = 1.1437e-03, PNorm = 173.3448, GNorm = 0.1489, lr_0 = 1.0047e-04
Loss = 8.4679e-04, PNorm = 173.3458, GNorm = 0.0443, lr_0 = 1.0041e-04
Loss = 8.1293e-04, PNorm = 173.3468, GNorm = 0.0373, lr_0 = 1.0034e-04
Loss = 6.5284e-04, PNorm = 173.3466, GNorm = 0.0633, lr_0 = 1.0027e-04
Loss = 1.5303e-03, PNorm = 173.3471, GNorm = 0.1189, lr_0 = 1.0020e-04
Loss = 9.7222e-04, PNorm = 173.3485, GNorm = 0.1165, lr_0 = 1.0013e-04
Loss = 2.8292e-03, PNorm = 173.3501, GNorm = 0.0473, lr_0 = 1.0006e-04
Loss = 2.7284e-03, PNorm = 173.3519, GNorm = 0.0215, lr_0 = 1.0000e-04
Validation mae = 0.278206
Model 0 best validation mae = 0.277877 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.273563
Ensemble test mae = 0.273563
Fold 4
Splitting data with seed 4
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 8.7075e-01, PNorm = 65.7658, GNorm = 1.7583, lr_0 = 1.0413e-04
Loss = 6.8019e-01, PNorm = 65.7779, GNorm = 2.1724, lr_0 = 1.0788e-04
Loss = 5.6751e-01, PNorm = 65.7918, GNorm = 2.9226, lr_0 = 1.1163e-04
Loss = 4.8237e-01, PNorm = 65.8034, GNorm = 4.1761, lr_0 = 1.1537e-04
Loss = 4.8416e-01, PNorm = 65.8124, GNorm = 3.5119, lr_0 = 1.1913e-04
Loss = 4.4608e-01, PNorm = 65.8209, GNorm = 3.0653, lr_0 = 1.2287e-04
Loss = 3.8702e-01, PNorm = 65.8306, GNorm = 1.6537, lr_0 = 1.2663e-04
Loss = 4.3281e-01, PNorm = 65.8401, GNorm = 1.7804, lr_0 = 1.3038e-04
Loss = 4.5340e-01, PNorm = 65.8481, GNorm = 3.3566, lr_0 = 1.3413e-04
Loss = 4.2851e-01, PNorm = 65.8568, GNorm = 1.8882, lr_0 = 1.3788e-04
Loss = 4.2091e-01, PNorm = 65.8677, GNorm = 2.0698, lr_0 = 1.4163e-04
Loss = 3.3527e-01, PNorm = 65.8802, GNorm = 2.8308, lr_0 = 1.4537e-04
Loss = 3.6767e-01, PNorm = 65.8916, GNorm = 1.9648, lr_0 = 1.4913e-04
Loss = 3.4152e-01, PNorm = 65.9010, GNorm = 2.6340, lr_0 = 1.5288e-04
Loss = 3.7127e-01, PNorm = 65.9105, GNorm = 2.0067, lr_0 = 1.5662e-04
Loss = 3.7693e-01, PNorm = 65.9205, GNorm = 2.3087, lr_0 = 1.6038e-04
Loss = 3.4777e-01, PNorm = 65.9305, GNorm = 2.3455, lr_0 = 1.6412e-04
Loss = 4.0938e-01, PNorm = 65.9436, GNorm = 1.5616, lr_0 = 1.6788e-04
Loss = 3.2882e-01, PNorm = 65.9587, GNorm = 2.1102, lr_0 = 1.7163e-04
Loss = 3.4493e-01, PNorm = 65.9726, GNorm = 2.0698, lr_0 = 1.7538e-04
Loss = 3.0276e-01, PNorm = 65.9851, GNorm = 1.9312, lr_0 = 1.7913e-04
Loss = 3.3073e-01, PNorm = 65.9987, GNorm = 1.7801, lr_0 = 1.8288e-04
Loss = 3.5714e-01, PNorm = 66.0139, GNorm = 3.0239, lr_0 = 1.8662e-04
Loss = 3.3130e-01, PNorm = 66.0269, GNorm = 1.5598, lr_0 = 1.9038e-04
Loss = 3.1043e-01, PNorm = 66.0407, GNorm = 1.3961, lr_0 = 1.9413e-04
Loss = 2.6526e-01, PNorm = 66.0583, GNorm = 1.2040, lr_0 = 1.9788e-04
Loss = 3.3679e-01, PNorm = 66.0719, GNorm = 2.4227, lr_0 = 2.0163e-04
Loss = 3.4100e-01, PNorm = 66.0873, GNorm = 2.0482, lr_0 = 2.0537e-04
Loss = 3.1394e-01, PNorm = 66.1049, GNorm = 2.1609, lr_0 = 2.0913e-04
Loss = 2.9390e-01, PNorm = 66.1211, GNorm = 1.7726, lr_0 = 2.1288e-04
Loss = 3.2331e-01, PNorm = 66.1417, GNorm = 2.5093, lr_0 = 2.1663e-04
Loss = 3.2220e-01, PNorm = 66.1577, GNorm = 1.8322, lr_0 = 2.2038e-04
Loss = 3.4547e-01, PNorm = 66.1780, GNorm = 1.3744, lr_0 = 2.2412e-04
Loss = 3.2862e-01, PNorm = 66.1958, GNorm = 1.5519, lr_0 = 2.2787e-04
Loss = 3.7340e-01, PNorm = 66.2142, GNorm = 2.8691, lr_0 = 2.3163e-04
Loss = 2.9265e-01, PNorm = 66.2336, GNorm = 1.8813, lr_0 = 2.3538e-04
Loss = 3.1280e-01, PNorm = 66.2529, GNorm = 1.4330, lr_0 = 2.3913e-04
Loss = 3.4447e-01, PNorm = 66.2737, GNorm = 2.1093, lr_0 = 2.4288e-04
Loss = 3.4061e-01, PNorm = 66.2931, GNorm = 1.4296, lr_0 = 2.4662e-04
Loss = 3.1105e-01, PNorm = 66.3153, GNorm = 1.2490, lr_0 = 2.5038e-04
Loss = 2.7364e-01, PNorm = 66.3377, GNorm = 2.2697, lr_0 = 2.5413e-04
Loss = 2.6571e-01, PNorm = 66.3551, GNorm = 1.7172, lr_0 = 2.5788e-04
Loss = 3.0174e-01, PNorm = 66.3731, GNorm = 2.5227, lr_0 = 2.6163e-04
Loss = 2.7975e-01, PNorm = 66.3939, GNorm = 1.4074, lr_0 = 2.6537e-04
Loss = 2.9833e-01, PNorm = 66.4168, GNorm = 2.2378, lr_0 = 2.6912e-04
Loss = 2.9254e-01, PNorm = 66.4380, GNorm = 1.5519, lr_0 = 2.7288e-04
Loss = 3.2932e-01, PNorm = 66.4604, GNorm = 1.4964, lr_0 = 2.7663e-04
Loss = 2.7983e-01, PNorm = 66.4846, GNorm = 1.9129, lr_0 = 2.8038e-04
Loss = 2.6274e-01, PNorm = 66.5094, GNorm = 1.1001, lr_0 = 2.8413e-04
Loss = 2.8396e-01, PNorm = 66.5316, GNorm = 2.0086, lr_0 = 2.8787e-04
Loss = 2.8250e-01, PNorm = 66.5581, GNorm = 1.5008, lr_0 = 2.9163e-04
Loss = 3.2399e-01, PNorm = 66.5817, GNorm = 1.5478, lr_0 = 2.9538e-04
Loss = 2.8479e-01, PNorm = 66.6118, GNorm = 1.5375, lr_0 = 2.9913e-04
Loss = 3.0031e-01, PNorm = 66.6439, GNorm = 1.1138, lr_0 = 3.0288e-04
Loss = 3.0113e-01, PNorm = 66.6669, GNorm = 0.8153, lr_0 = 3.0662e-04
Loss = 3.1105e-01, PNorm = 66.7003, GNorm = 1.1245, lr_0 = 3.1037e-04
Loss = 2.9286e-01, PNorm = 66.7263, GNorm = 0.9733, lr_0 = 3.1413e-04
Loss = 2.6371e-01, PNorm = 66.7533, GNorm = 1.2875, lr_0 = 3.1788e-04
Loss = 2.9340e-01, PNorm = 66.7843, GNorm = 1.2123, lr_0 = 3.2163e-04
Loss = 3.0110e-01, PNorm = 66.8100, GNorm = 2.1822, lr_0 = 3.2538e-04
Loss = 3.2418e-01, PNorm = 66.8437, GNorm = 1.4749, lr_0 = 3.2912e-04
Loss = 2.5556e-01, PNorm = 66.8770, GNorm = 1.2055, lr_0 = 3.3288e-04
Loss = 3.0778e-01, PNorm = 66.9125, GNorm = 1.0059, lr_0 = 3.3663e-04
Loss = 2.7912e-01, PNorm = 66.9399, GNorm = 1.0361, lr_0 = 3.4038e-04
Loss = 3.1917e-01, PNorm = 66.9733, GNorm = 1.5172, lr_0 = 3.4413e-04
Loss = 2.5617e-01, PNorm = 67.0037, GNorm = 1.7060, lr_0 = 3.4787e-04
Loss = 3.1277e-01, PNorm = 67.0340, GNorm = 1.1949, lr_0 = 3.5162e-04
Loss = 2.6722e-01, PNorm = 67.0675, GNorm = 1.3572, lr_0 = 3.5538e-04
Loss = 2.7867e-01, PNorm = 67.1011, GNorm = 0.9350, lr_0 = 3.5913e-04
Loss = 2.5677e-01, PNorm = 67.1290, GNorm = 1.1251, lr_0 = 3.6288e-04
Loss = 2.6081e-01, PNorm = 67.1618, GNorm = 1.4429, lr_0 = 3.6662e-04
Loss = 2.9202e-01, PNorm = 67.1945, GNorm = 1.0570, lr_0 = 3.7037e-04
Loss = 2.9845e-01, PNorm = 67.2316, GNorm = 2.0878, lr_0 = 3.7413e-04
Loss = 3.1509e-01, PNorm = 67.2662, GNorm = 0.9989, lr_0 = 3.7788e-04
Loss = 2.5572e-01, PNorm = 67.3030, GNorm = 1.3300, lr_0 = 3.8163e-04
Loss = 2.8975e-01, PNorm = 67.3391, GNorm = 1.0247, lr_0 = 3.8537e-04
Loss = 3.1975e-01, PNorm = 67.3791, GNorm = 1.0469, lr_0 = 3.8912e-04
Loss = 2.8674e-01, PNorm = 67.4282, GNorm = 0.9876, lr_0 = 3.9287e-04
Loss = 2.8100e-01, PNorm = 67.4717, GNorm = 0.9542, lr_0 = 3.9663e-04
Loss = 2.6302e-01, PNorm = 67.5184, GNorm = 1.2620, lr_0 = 4.0038e-04
Loss = 2.5927e-01, PNorm = 67.5561, GNorm = 1.6678, lr_0 = 4.0413e-04
Loss = 2.7495e-01, PNorm = 67.5962, GNorm = 1.0387, lr_0 = 4.0787e-04
Loss = 2.5543e-01, PNorm = 67.6329, GNorm = 1.0403, lr_0 = 4.1162e-04
Loss = 2.7100e-01, PNorm = 67.6702, GNorm = 0.9499, lr_0 = 4.1537e-04
Loss = 2.8229e-01, PNorm = 67.7108, GNorm = 1.0691, lr_0 = 4.1913e-04
Loss = 2.2288e-01, PNorm = 67.7552, GNorm = 0.9351, lr_0 = 4.2288e-04
Loss = 2.5967e-01, PNorm = 67.7964, GNorm = 1.0641, lr_0 = 4.2662e-04
Loss = 2.5713e-01, PNorm = 67.8376, GNorm = 1.2594, lr_0 = 4.3037e-04
Loss = 2.6486e-01, PNorm = 67.8779, GNorm = 1.1963, lr_0 = 4.3412e-04
Loss = 2.2865e-01, PNorm = 67.9166, GNorm = 0.9061, lr_0 = 4.3788e-04
Loss = 2.9475e-01, PNorm = 67.9551, GNorm = 1.2597, lr_0 = 4.4163e-04
Loss = 3.0412e-01, PNorm = 68.0047, GNorm = 1.3325, lr_0 = 4.4538e-04
Loss = 2.7373e-01, PNorm = 68.0503, GNorm = 1.0814, lr_0 = 4.4912e-04
Loss = 2.9820e-01, PNorm = 68.1082, GNorm = 1.0889, lr_0 = 4.5287e-04
Loss = 2.7011e-01, PNorm = 68.1558, GNorm = 1.3885, lr_0 = 4.5662e-04
Loss = 2.6367e-01, PNorm = 68.2090, GNorm = 0.8879, lr_0 = 4.6038e-04
Loss = 2.3224e-01, PNorm = 68.2626, GNorm = 1.0306, lr_0 = 4.6413e-04
Loss = 2.5901e-01, PNorm = 68.3098, GNorm = 1.3078, lr_0 = 4.6787e-04
Loss = 2.7511e-01, PNorm = 68.3639, GNorm = 0.9685, lr_0 = 4.7162e-04
Loss = 2.6724e-01, PNorm = 68.4159, GNorm = 1.1296, lr_0 = 4.7537e-04
Loss = 2.3936e-01, PNorm = 68.4698, GNorm = 1.3713, lr_0 = 4.7913e-04
Loss = 2.4172e-01, PNorm = 68.5194, GNorm = 0.7220, lr_0 = 4.8288e-04
Loss = 2.5838e-01, PNorm = 68.5701, GNorm = 0.8727, lr_0 = 4.8663e-04
Loss = 2.6443e-01, PNorm = 68.6256, GNorm = 0.9324, lr_0 = 4.9038e-04
Loss = 2.6415e-01, PNorm = 68.6800, GNorm = 1.1359, lr_0 = 4.9412e-04
Loss = 2.3751e-01, PNorm = 68.7318, GNorm = 0.9239, lr_0 = 4.9788e-04
Loss = 2.2025e-01, PNorm = 68.7844, GNorm = 0.8672, lr_0 = 5.0163e-04
Loss = 2.6412e-01, PNorm = 68.8378, GNorm = 0.9658, lr_0 = 5.0538e-04
Loss = 2.5363e-01, PNorm = 68.8969, GNorm = 1.1585, lr_0 = 5.0913e-04
Loss = 2.8690e-01, PNorm = 68.9563, GNorm = 0.9629, lr_0 = 5.1287e-04
Loss = 2.5972e-01, PNorm = 69.0216, GNorm = 1.1667, lr_0 = 5.1663e-04
Loss = 2.5879e-01, PNorm = 69.0746, GNorm = 1.2980, lr_0 = 5.2038e-04
Loss = 2.7601e-01, PNorm = 69.1347, GNorm = 1.3104, lr_0 = 5.2413e-04
Loss = 2.5924e-01, PNorm = 69.1910, GNorm = 0.8126, lr_0 = 5.2788e-04
Loss = 2.6137e-01, PNorm = 69.2539, GNorm = 1.0719, lr_0 = 5.3162e-04
Loss = 2.7230e-01, PNorm = 69.3141, GNorm = 1.3427, lr_0 = 5.3538e-04
Loss = 2.4749e-01, PNorm = 69.3785, GNorm = 1.0918, lr_0 = 5.3912e-04
Loss = 2.3511e-01, PNorm = 69.4369, GNorm = 0.8254, lr_0 = 5.4288e-04
Loss = 2.3809e-01, PNorm = 69.4944, GNorm = 0.8967, lr_0 = 5.4663e-04
Loss = 2.7217e-01, PNorm = 69.5600, GNorm = 1.0105, lr_0 = 5.5038e-04
Validation mae = 0.319743
Epoch 1
Loss = 1.7125e-01, PNorm = 69.6267, GNorm = 0.9546, lr_0 = 5.5413e-04
Loss = 1.5500e-01, PNorm = 69.6824, GNorm = 1.0011, lr_0 = 5.5787e-04
Loss = 1.7924e-01, PNorm = 69.7329, GNorm = 1.1376, lr_0 = 5.6163e-04
Loss = 1.9329e-01, PNorm = 69.7853, GNorm = 1.0069, lr_0 = 5.6538e-04
Loss = 1.8753e-01, PNorm = 69.8453, GNorm = 1.0078, lr_0 = 5.6913e-04
Loss = 1.6148e-01, PNorm = 69.9049, GNorm = 0.8163, lr_0 = 5.7288e-04
Loss = 1.8220e-01, PNorm = 69.9610, GNorm = 0.8066, lr_0 = 5.7662e-04
Loss = 1.5935e-01, PNorm = 70.0334, GNorm = 0.7558, lr_0 = 5.8038e-04
Loss = 1.4598e-01, PNorm = 70.0933, GNorm = 0.9355, lr_0 = 5.8413e-04
Loss = 1.7818e-01, PNorm = 70.1460, GNorm = 0.8791, lr_0 = 5.8788e-04
Loss = 1.5826e-01, PNorm = 70.2265, GNorm = 0.8879, lr_0 = 5.9163e-04
Loss = 1.5186e-01, PNorm = 70.2957, GNorm = 0.8917, lr_0 = 5.9538e-04
Loss = 1.8350e-01, PNorm = 70.3668, GNorm = 0.7986, lr_0 = 5.9913e-04
Loss = 1.7669e-01, PNorm = 70.4394, GNorm = 0.7006, lr_0 = 6.0288e-04
Loss = 1.7742e-01, PNorm = 70.5071, GNorm = 0.6211, lr_0 = 6.0663e-04
Loss = 1.7169e-01, PNorm = 70.5781, GNorm = 1.0144, lr_0 = 6.1038e-04
Loss = 1.8406e-01, PNorm = 70.6474, GNorm = 0.7415, lr_0 = 6.1413e-04
Loss = 1.7720e-01, PNorm = 70.7276, GNorm = 0.7467, lr_0 = 6.1788e-04
Loss = 1.8163e-01, PNorm = 70.8097, GNorm = 0.8378, lr_0 = 6.2163e-04
Loss = 1.8949e-01, PNorm = 70.8932, GNorm = 0.8281, lr_0 = 6.2538e-04
Loss = 1.8577e-01, PNorm = 70.9777, GNorm = 1.0077, lr_0 = 6.2913e-04
Loss = 1.9525e-01, PNorm = 71.0614, GNorm = 0.7455, lr_0 = 6.3288e-04
Loss = 1.9329e-01, PNorm = 71.1525, GNorm = 0.8532, lr_0 = 6.3663e-04
Loss = 1.7809e-01, PNorm = 71.2349, GNorm = 1.0349, lr_0 = 6.4038e-04
Loss = 2.1245e-01, PNorm = 71.3318, GNorm = 0.9559, lr_0 = 6.4413e-04
Loss = 2.0191e-01, PNorm = 71.4277, GNorm = 0.5666, lr_0 = 6.4788e-04
Loss = 1.8137e-01, PNorm = 71.5327, GNorm = 0.6553, lr_0 = 6.5163e-04
Loss = 1.9612e-01, PNorm = 71.6201, GNorm = 0.6837, lr_0 = 6.5538e-04
Loss = 1.7919e-01, PNorm = 71.7166, GNorm = 0.6988, lr_0 = 6.5913e-04
Loss = 1.6498e-01, PNorm = 71.8112, GNorm = 0.9046, lr_0 = 6.6288e-04
Loss = 1.7625e-01, PNorm = 71.8905, GNorm = 0.9308, lr_0 = 6.6663e-04
Loss = 1.9113e-01, PNorm = 71.9829, GNorm = 0.7073, lr_0 = 6.7038e-04
Loss = 2.1186e-01, PNorm = 72.0861, GNorm = 0.9041, lr_0 = 6.7413e-04
Loss = 2.1080e-01, PNorm = 72.1924, GNorm = 0.9841, lr_0 = 6.7788e-04
Loss = 1.8537e-01, PNorm = 72.2917, GNorm = 0.8464, lr_0 = 6.8163e-04
Loss = 1.8046e-01, PNorm = 72.3960, GNorm = 0.6892, lr_0 = 6.8538e-04
Loss = 1.9114e-01, PNorm = 72.4894, GNorm = 0.7733, lr_0 = 6.8913e-04
Loss = 1.9466e-01, PNorm = 72.5854, GNorm = 1.2949, lr_0 = 6.9288e-04
Loss = 1.8962e-01, PNorm = 72.6847, GNorm = 1.1075, lr_0 = 6.9663e-04
Loss = 1.8850e-01, PNorm = 72.7857, GNorm = 1.1927, lr_0 = 7.0038e-04
Loss = 1.6910e-01, PNorm = 72.8885, GNorm = 1.0332, lr_0 = 7.0413e-04
Loss = 2.0274e-01, PNorm = 72.9867, GNorm = 1.1007, lr_0 = 7.0788e-04
Loss = 1.9684e-01, PNorm = 73.0831, GNorm = 0.6585, lr_0 = 7.1163e-04
Loss = 1.8383e-01, PNorm = 73.1872, GNorm = 0.7493, lr_0 = 7.1538e-04
Loss = 1.7676e-01, PNorm = 73.2901, GNorm = 0.6943, lr_0 = 7.1913e-04
Loss = 1.9333e-01, PNorm = 73.3882, GNorm = 0.7055, lr_0 = 7.2288e-04
Loss = 2.5010e-01, PNorm = 73.4840, GNorm = 0.7936, lr_0 = 7.2663e-04
Loss = 2.2099e-01, PNorm = 73.6050, GNorm = 1.0610, lr_0 = 7.3038e-04
Loss = 1.8940e-01, PNorm = 73.7154, GNorm = 0.9641, lr_0 = 7.3413e-04
Loss = 1.8349e-01, PNorm = 73.8316, GNorm = 0.9038, lr_0 = 7.3788e-04
Loss = 2.0650e-01, PNorm = 73.9498, GNorm = 0.9691, lr_0 = 7.4163e-04
Loss = 2.1680e-01, PNorm = 74.0681, GNorm = 0.8692, lr_0 = 7.4538e-04
Loss = 2.0715e-01, PNorm = 74.1911, GNorm = 0.9655, lr_0 = 7.4913e-04
Loss = 2.0012e-01, PNorm = 74.3121, GNorm = 0.6218, lr_0 = 7.5288e-04
Loss = 2.0847e-01, PNorm = 74.4333, GNorm = 0.8442, lr_0 = 7.5663e-04
Loss = 2.0165e-01, PNorm = 74.5485, GNorm = 0.7920, lr_0 = 7.6038e-04
Loss = 2.1841e-01, PNorm = 74.6654, GNorm = 0.8969, lr_0 = 7.6413e-04
Loss = 2.0110e-01, PNorm = 74.7844, GNorm = 0.8646, lr_0 = 7.6788e-04
Loss = 2.1024e-01, PNorm = 74.9043, GNorm = 0.8741, lr_0 = 7.7163e-04
Loss = 1.8433e-01, PNorm = 75.0205, GNorm = 0.6795, lr_0 = 7.7538e-04
Loss = 1.9010e-01, PNorm = 75.1361, GNorm = 1.1876, lr_0 = 7.7913e-04
Loss = 1.8545e-01, PNorm = 75.2414, GNorm = 0.8937, lr_0 = 7.8288e-04
Loss = 1.7920e-01, PNorm = 75.3579, GNorm = 0.6585, lr_0 = 7.8663e-04
Loss = 1.7296e-01, PNorm = 75.4654, GNorm = 0.7521, lr_0 = 7.9038e-04
Loss = 2.0025e-01, PNorm = 75.5755, GNorm = 0.8372, lr_0 = 7.9413e-04
Loss = 1.9852e-01, PNorm = 75.6885, GNorm = 0.9823, lr_0 = 7.9788e-04
Loss = 2.1353e-01, PNorm = 75.8106, GNorm = 1.1195, lr_0 = 8.0163e-04
Loss = 2.0226e-01, PNorm = 75.9429, GNorm = 0.8250, lr_0 = 8.0538e-04
Loss = 2.1657e-01, PNorm = 76.0657, GNorm = 0.8929, lr_0 = 8.0913e-04
Loss = 2.0473e-01, PNorm = 76.1967, GNorm = 1.0206, lr_0 = 8.1288e-04
Loss = 2.1341e-01, PNorm = 76.3197, GNorm = 0.8694, lr_0 = 8.1663e-04
Loss = 1.9575e-01, PNorm = 76.4412, GNorm = 1.0242, lr_0 = 8.2038e-04
Loss = 1.8824e-01, PNorm = 76.5652, GNorm = 0.6727, lr_0 = 8.2413e-04
Loss = 1.9437e-01, PNorm = 76.6852, GNorm = 1.2505, lr_0 = 8.2788e-04
Loss = 1.9647e-01, PNorm = 76.7973, GNorm = 0.7559, lr_0 = 8.3163e-04
Loss = 1.8032e-01, PNorm = 76.9080, GNorm = 0.7163, lr_0 = 8.3538e-04
Loss = 2.1691e-01, PNorm = 77.0176, GNorm = 1.0729, lr_0 = 8.3913e-04
Loss = 2.0697e-01, PNorm = 77.1479, GNorm = 1.2436, lr_0 = 8.4288e-04
Loss = 2.1926e-01, PNorm = 77.2859, GNorm = 0.9495, lr_0 = 8.4663e-04
Loss = 2.0650e-01, PNorm = 77.4198, GNorm = 0.8598, lr_0 = 8.5038e-04
Loss = 2.0097e-01, PNorm = 77.5401, GNorm = 1.0168, lr_0 = 8.5413e-04
Loss = 1.9288e-01, PNorm = 77.6659, GNorm = 0.8000, lr_0 = 8.5788e-04
Loss = 1.8686e-01, PNorm = 77.7907, GNorm = 0.6969, lr_0 = 8.6163e-04
Loss = 2.2530e-01, PNorm = 77.9090, GNorm = 0.8754, lr_0 = 8.6538e-04
Loss = 2.1067e-01, PNorm = 78.0373, GNorm = 0.9026, lr_0 = 8.6913e-04
Loss = 2.0872e-01, PNorm = 78.1714, GNorm = 0.8235, lr_0 = 8.7288e-04
Loss = 2.3943e-01, PNorm = 78.3083, GNorm = 1.1031, lr_0 = 8.7663e-04
Loss = 2.2661e-01, PNorm = 78.4419, GNorm = 0.6617, lr_0 = 8.8038e-04
Loss = 1.9965e-01, PNorm = 78.6001, GNorm = 1.2851, lr_0 = 8.8413e-04
Loss = 2.2022e-01, PNorm = 78.7405, GNorm = 0.9848, lr_0 = 8.8788e-04
Loss = 2.0293e-01, PNorm = 78.8953, GNorm = 0.6220, lr_0 = 8.9163e-04
Loss = 1.9651e-01, PNorm = 79.0274, GNorm = 0.8532, lr_0 = 8.9538e-04
Loss = 2.1448e-01, PNorm = 79.1616, GNorm = 0.6773, lr_0 = 8.9913e-04
Loss = 2.0560e-01, PNorm = 79.2990, GNorm = 0.6795, lr_0 = 9.0288e-04
Loss = 1.9587e-01, PNorm = 79.4459, GNorm = 0.9464, lr_0 = 9.0663e-04
Loss = 2.1125e-01, PNorm = 79.5925, GNorm = 1.2500, lr_0 = 9.1038e-04
Loss = 1.7671e-01, PNorm = 79.7380, GNorm = 0.9714, lr_0 = 9.1413e-04
Loss = 1.9138e-01, PNorm = 79.8774, GNorm = 0.7451, lr_0 = 9.1788e-04
Loss = 2.2499e-01, PNorm = 80.0277, GNorm = 0.9149, lr_0 = 9.2163e-04
Loss = 2.1958e-01, PNorm = 80.1897, GNorm = 0.7501, lr_0 = 9.2538e-04
Loss = 2.0096e-01, PNorm = 80.3505, GNorm = 0.8946, lr_0 = 9.2913e-04
Loss = 1.9953e-01, PNorm = 80.4944, GNorm = 1.0649, lr_0 = 9.3288e-04
Loss = 1.8547e-01, PNorm = 80.6332, GNorm = 0.8071, lr_0 = 9.3663e-04
Loss = 2.3303e-01, PNorm = 80.7698, GNorm = 0.7587, lr_0 = 9.4038e-04
Loss = 2.0740e-01, PNorm = 80.9249, GNorm = 0.7686, lr_0 = 9.4413e-04
Loss = 2.2378e-01, PNorm = 81.0733, GNorm = 0.8523, lr_0 = 9.4788e-04
Loss = 2.1124e-01, PNorm = 81.2331, GNorm = 0.8362, lr_0 = 9.5163e-04
Loss = 2.0781e-01, PNorm = 81.3841, GNorm = 1.0772, lr_0 = 9.5538e-04
Loss = 2.0824e-01, PNorm = 81.5475, GNorm = 1.1657, lr_0 = 9.5913e-04
Loss = 2.1331e-01, PNorm = 81.6793, GNorm = 0.7577, lr_0 = 9.6288e-04
Loss = 1.8925e-01, PNorm = 81.8351, GNorm = 0.7062, lr_0 = 9.6663e-04
Loss = 2.1939e-01, PNorm = 81.9841, GNorm = 0.7531, lr_0 = 9.7038e-04
Loss = 1.9947e-01, PNorm = 82.1342, GNorm = 0.9702, lr_0 = 9.7413e-04
Loss = 1.8794e-01, PNorm = 82.2746, GNorm = 0.7750, lr_0 = 9.7788e-04
Loss = 2.0527e-01, PNorm = 82.4173, GNorm = 0.5426, lr_0 = 9.8163e-04
Loss = 2.2096e-01, PNorm = 82.5585, GNorm = 0.8825, lr_0 = 9.8537e-04
Loss = 2.0157e-01, PNorm = 82.7081, GNorm = 1.1606, lr_0 = 9.8912e-04
Loss = 1.9995e-01, PNorm = 82.8605, GNorm = 0.7344, lr_0 = 9.9288e-04
Loss = 2.2068e-01, PNorm = 83.0064, GNorm = 0.8149, lr_0 = 9.9663e-04
Loss = 2.1591e-01, PNorm = 83.1643, GNorm = 0.9216, lr_0 = 9.9993e-04
Validation mae = 0.310705
Epoch 2
Loss = 1.3830e-01, PNorm = 83.2993, GNorm = 0.7090, lr_0 = 9.9925e-04
Loss = 1.2062e-01, PNorm = 83.4265, GNorm = 1.1358, lr_0 = 9.9856e-04
Loss = 1.2959e-01, PNorm = 83.5304, GNorm = 0.7186, lr_0 = 9.9788e-04
Loss = 1.1847e-01, PNorm = 83.6476, GNorm = 0.4798, lr_0 = 9.9719e-04
Loss = 1.2399e-01, PNorm = 83.7496, GNorm = 0.6908, lr_0 = 9.9651e-04
Loss = 1.3847e-01, PNorm = 83.8592, GNorm = 0.7175, lr_0 = 9.9583e-04
Loss = 1.2039e-01, PNorm = 83.9859, GNorm = 0.4829, lr_0 = 9.9515e-04
Loss = 1.3212e-01, PNorm = 84.0969, GNorm = 0.8885, lr_0 = 9.9446e-04
Loss = 1.1483e-01, PNorm = 84.2219, GNorm = 0.5849, lr_0 = 9.9378e-04
Loss = 1.1966e-01, PNorm = 84.3263, GNorm = 0.6141, lr_0 = 9.9310e-04
Loss = 1.3487e-01, PNorm = 84.4389, GNorm = 0.5394, lr_0 = 9.9242e-04
Loss = 1.2761e-01, PNorm = 84.5550, GNorm = 0.7104, lr_0 = 9.9174e-04
Loss = 1.1523e-01, PNorm = 84.6872, GNorm = 1.0776, lr_0 = 9.9106e-04
Loss = 1.1991e-01, PNorm = 84.7944, GNorm = 0.6047, lr_0 = 9.9038e-04
Loss = 1.2545e-01, PNorm = 84.9131, GNorm = 0.5522, lr_0 = 9.8971e-04
Loss = 1.2229e-01, PNorm = 85.0264, GNorm = 0.6027, lr_0 = 9.8903e-04
Loss = 1.1808e-01, PNorm = 85.1426, GNorm = 0.5958, lr_0 = 9.8835e-04
Loss = 1.2756e-01, PNorm = 85.2544, GNorm = 0.5292, lr_0 = 9.8767e-04
Loss = 1.1905e-01, PNorm = 85.3781, GNorm = 0.7030, lr_0 = 9.8700e-04
Loss = 1.2053e-01, PNorm = 85.4816, GNorm = 0.8358, lr_0 = 9.8632e-04
Loss = 1.3241e-01, PNorm = 85.6089, GNorm = 1.0051, lr_0 = 9.8564e-04
Loss = 1.0803e-01, PNorm = 85.7284, GNorm = 0.5444, lr_0 = 9.8497e-04
Loss = 1.2511e-01, PNorm = 85.8348, GNorm = 0.3737, lr_0 = 9.8429e-04
Loss = 1.1981e-01, PNorm = 85.9523, GNorm = 0.7195, lr_0 = 9.8362e-04
Loss = 1.3090e-01, PNorm = 86.0741, GNorm = 0.5195, lr_0 = 9.8295e-04
Loss = 1.2792e-01, PNorm = 86.2100, GNorm = 0.5869, lr_0 = 9.8227e-04
Loss = 1.1936e-01, PNorm = 86.3397, GNorm = 0.6263, lr_0 = 9.8160e-04
Loss = 1.2393e-01, PNorm = 86.4682, GNorm = 0.6386, lr_0 = 9.8093e-04
Loss = 1.1642e-01, PNorm = 86.5940, GNorm = 0.6545, lr_0 = 9.8026e-04
Loss = 1.3447e-01, PNorm = 86.7275, GNorm = 0.5393, lr_0 = 9.7958e-04
Loss = 1.3481e-01, PNorm = 86.8533, GNorm = 0.7314, lr_0 = 9.7891e-04
Loss = 1.5791e-01, PNorm = 86.9877, GNorm = 1.6976, lr_0 = 9.7824e-04
Loss = 1.3650e-01, PNorm = 87.1333, GNorm = 0.6198, lr_0 = 9.7757e-04
Loss = 1.1974e-01, PNorm = 87.2609, GNorm = 0.6702, lr_0 = 9.7690e-04
Loss = 1.3848e-01, PNorm = 87.3784, GNorm = 0.7442, lr_0 = 9.7623e-04
Loss = 1.4758e-01, PNorm = 87.5065, GNorm = 0.7973, lr_0 = 9.7556e-04
Loss = 1.2787e-01, PNorm = 87.6328, GNorm = 0.7806, lr_0 = 9.7490e-04
Loss = 1.1089e-01, PNorm = 87.7485, GNorm = 0.7735, lr_0 = 9.7423e-04
Loss = 1.5017e-01, PNorm = 87.8836, GNorm = 1.2778, lr_0 = 9.7356e-04
Loss = 1.5124e-01, PNorm = 88.0218, GNorm = 0.8600, lr_0 = 9.7289e-04
Loss = 1.2801e-01, PNorm = 88.1650, GNorm = 0.5323, lr_0 = 9.7223e-04
Loss = 1.2900e-01, PNorm = 88.2947, GNorm = 0.5772, lr_0 = 9.7156e-04
Loss = 1.0699e-01, PNorm = 88.4300, GNorm = 0.8081, lr_0 = 9.7090e-04
Loss = 1.3346e-01, PNorm = 88.5451, GNorm = 0.6859, lr_0 = 9.7023e-04
Loss = 1.2933e-01, PNorm = 88.6797, GNorm = 1.0614, lr_0 = 9.6957e-04
Loss = 1.5020e-01, PNorm = 88.8160, GNorm = 0.7066, lr_0 = 9.6890e-04
Loss = 1.3081e-01, PNorm = 88.9639, GNorm = 0.6451, lr_0 = 9.6824e-04
Loss = 1.3361e-01, PNorm = 89.1086, GNorm = 0.9710, lr_0 = 9.6757e-04
Loss = 1.4841e-01, PNorm = 89.2312, GNorm = 0.6178, lr_0 = 9.6691e-04
Loss = 1.2928e-01, PNorm = 89.3628, GNorm = 0.9684, lr_0 = 9.6625e-04
Loss = 1.3487e-01, PNorm = 89.5005, GNorm = 0.5325, lr_0 = 9.6559e-04
Loss = 1.2469e-01, PNorm = 89.6200, GNorm = 1.0007, lr_0 = 9.6493e-04
Loss = 1.4080e-01, PNorm = 89.7429, GNorm = 1.2124, lr_0 = 9.6427e-04
Loss = 1.1883e-01, PNorm = 89.8610, GNorm = 0.8130, lr_0 = 9.6360e-04
Loss = 1.3697e-01, PNorm = 89.9824, GNorm = 0.9377, lr_0 = 9.6294e-04
Loss = 1.4121e-01, PNorm = 90.1128, GNorm = 1.0132, lr_0 = 9.6228e-04
Loss = 1.4013e-01, PNorm = 90.2374, GNorm = 0.7190, lr_0 = 9.6163e-04
Loss = 1.4347e-01, PNorm = 90.3710, GNorm = 0.5371, lr_0 = 9.6097e-04
Loss = 1.3096e-01, PNorm = 90.5082, GNorm = 0.6396, lr_0 = 9.6031e-04
Loss = 1.3909e-01, PNorm = 90.6338, GNorm = 0.7305, lr_0 = 9.5965e-04
Loss = 1.2495e-01, PNorm = 90.7645, GNorm = 0.6274, lr_0 = 9.5899e-04
Loss = 1.2809e-01, PNorm = 90.8837, GNorm = 0.5139, lr_0 = 9.5834e-04
Loss = 1.4811e-01, PNorm = 91.0125, GNorm = 0.5102, lr_0 = 9.5768e-04
Loss = 1.2966e-01, PNorm = 91.1439, GNorm = 0.4905, lr_0 = 9.5702e-04
Loss = 1.3879e-01, PNorm = 91.2848, GNorm = 0.5923, lr_0 = 9.5637e-04
Loss = 1.4976e-01, PNorm = 91.4258, GNorm = 0.8130, lr_0 = 9.5571e-04
Loss = 1.2674e-01, PNorm = 91.5753, GNorm = 0.5429, lr_0 = 9.5506e-04
Loss = 1.4271e-01, PNorm = 91.7093, GNorm = 0.8354, lr_0 = 9.5440e-04
Loss = 1.6655e-01, PNorm = 91.8573, GNorm = 0.5271, lr_0 = 9.5375e-04
Loss = 1.4531e-01, PNorm = 92.0052, GNorm = 0.6250, lr_0 = 9.5310e-04
Loss = 1.3792e-01, PNorm = 92.1569, GNorm = 0.7802, lr_0 = 9.5244e-04
Loss = 1.2956e-01, PNorm = 92.3022, GNorm = 0.8501, lr_0 = 9.5179e-04
Loss = 1.3815e-01, PNorm = 92.4410, GNorm = 0.6952, lr_0 = 9.5114e-04
Loss = 1.5365e-01, PNorm = 92.5730, GNorm = 0.8422, lr_0 = 9.5049e-04
Loss = 1.4454e-01, PNorm = 92.7273, GNorm = 0.7695, lr_0 = 9.4984e-04
Loss = 1.3906e-01, PNorm = 92.8672, GNorm = 0.9170, lr_0 = 9.4919e-04
Loss = 1.2790e-01, PNorm = 93.0098, GNorm = 0.6761, lr_0 = 9.4854e-04
Loss = 1.4363e-01, PNorm = 93.1541, GNorm = 0.5967, lr_0 = 9.4789e-04
Loss = 1.4243e-01, PNorm = 93.3015, GNorm = 0.8699, lr_0 = 9.4724e-04
Loss = 1.5270e-01, PNorm = 93.4430, GNorm = 0.9257, lr_0 = 9.4659e-04
Loss = 1.4115e-01, PNorm = 93.5832, GNorm = 0.5678, lr_0 = 9.4594e-04
Loss = 1.0753e-01, PNorm = 93.7155, GNorm = 0.8025, lr_0 = 9.4529e-04
Loss = 1.4662e-01, PNorm = 93.8398, GNorm = 0.8869, lr_0 = 9.4464e-04
Loss = 1.3099e-01, PNorm = 93.9723, GNorm = 0.6722, lr_0 = 9.4400e-04
Loss = 1.3206e-01, PNorm = 94.0892, GNorm = 1.2321, lr_0 = 9.4335e-04
Loss = 1.5776e-01, PNorm = 94.2135, GNorm = 0.4644, lr_0 = 9.4270e-04
Loss = 1.5356e-01, PNorm = 94.3422, GNorm = 0.6333, lr_0 = 9.4206e-04
Loss = 1.2425e-01, PNorm = 94.4677, GNorm = 0.5696, lr_0 = 9.4141e-04
Loss = 1.3660e-01, PNorm = 94.5923, GNorm = 0.8461, lr_0 = 9.4077e-04
Loss = 1.4531e-01, PNorm = 94.7058, GNorm = 0.8453, lr_0 = 9.4012e-04
Loss = 1.4668e-01, PNorm = 94.8455, GNorm = 1.1013, lr_0 = 9.3948e-04
Loss = 1.4856e-01, PNorm = 94.9747, GNorm = 0.4368, lr_0 = 9.3884e-04
Loss = 1.3844e-01, PNorm = 95.1001, GNorm = 0.6684, lr_0 = 9.3819e-04
Loss = 1.3725e-01, PNorm = 95.2372, GNorm = 0.6330, lr_0 = 9.3755e-04
Loss = 1.3937e-01, PNorm = 95.3535, GNorm = 0.7454, lr_0 = 9.3691e-04
Loss = 1.4945e-01, PNorm = 95.4716, GNorm = 0.7664, lr_0 = 9.3627e-04
Loss = 1.5491e-01, PNorm = 95.5948, GNorm = 1.2937, lr_0 = 9.3562e-04
Loss = 1.3482e-01, PNorm = 95.7220, GNorm = 0.6768, lr_0 = 9.3498e-04
Loss = 1.4899e-01, PNorm = 95.8515, GNorm = 0.5469, lr_0 = 9.3434e-04
Loss = 1.2674e-01, PNorm = 95.9822, GNorm = 0.4931, lr_0 = 9.3370e-04
Loss = 1.4216e-01, PNorm = 96.1076, GNorm = 0.4690, lr_0 = 9.3306e-04
Loss = 1.4750e-01, PNorm = 96.2422, GNorm = 0.5344, lr_0 = 9.3242e-04
Loss = 1.4828e-01, PNorm = 96.3728, GNorm = 1.1548, lr_0 = 9.3178e-04
Loss = 1.6507e-01, PNorm = 96.5261, GNorm = 1.1595, lr_0 = 9.3115e-04
Loss = 1.5223e-01, PNorm = 96.6667, GNorm = 0.5772, lr_0 = 9.3051e-04
Loss = 1.5273e-01, PNorm = 96.8036, GNorm = 0.9645, lr_0 = 9.2987e-04
Loss = 1.6130e-01, PNorm = 96.9411, GNorm = 0.8177, lr_0 = 9.2923e-04
Loss = 1.3802e-01, PNorm = 97.0840, GNorm = 1.0057, lr_0 = 9.2860e-04
Loss = 1.3372e-01, PNorm = 97.2162, GNorm = 0.6117, lr_0 = 9.2796e-04
Loss = 1.5089e-01, PNorm = 97.3513, GNorm = 0.7172, lr_0 = 9.2733e-04
Loss = 1.3928e-01, PNorm = 97.4845, GNorm = 1.1980, lr_0 = 9.2669e-04
Loss = 1.5378e-01, PNorm = 97.6283, GNorm = 0.9053, lr_0 = 9.2606e-04
Loss = 1.5983e-01, PNorm = 97.7790, GNorm = 0.9858, lr_0 = 9.2542e-04
Loss = 1.4308e-01, PNorm = 97.9246, GNorm = 0.6058, lr_0 = 9.2479e-04
Loss = 1.4152e-01, PNorm = 98.0544, GNorm = 0.5272, lr_0 = 9.2415e-04
Loss = 1.4889e-01, PNorm = 98.1813, GNorm = 0.5485, lr_0 = 9.2352e-04
Loss = 1.2640e-01, PNorm = 98.3040, GNorm = 0.5786, lr_0 = 9.2289e-04
Loss = 1.4415e-01, PNorm = 98.4277, GNorm = 0.7023, lr_0 = 9.2226e-04
Loss = 1.5909e-01, PNorm = 98.5510, GNorm = 0.6011, lr_0 = 9.2162e-04
Loss = 1.6142e-01, PNorm = 98.6873, GNorm = 0.9386, lr_0 = 9.2099e-04
Validation mae = 0.298347
Epoch 3
Loss = 9.6720e-02, PNorm = 98.8014, GNorm = 0.6651, lr_0 = 9.2036e-04
Loss = 9.3872e-02, PNorm = 98.8952, GNorm = 0.9356, lr_0 = 9.1973e-04
Loss = 9.6968e-02, PNorm = 98.9884, GNorm = 0.6386, lr_0 = 9.1910e-04
Loss = 1.0459e-01, PNorm = 99.0772, GNorm = 0.7345, lr_0 = 9.1847e-04
Loss = 8.8117e-02, PNorm = 99.1638, GNorm = 1.3216, lr_0 = 9.1784e-04
Loss = 8.9929e-02, PNorm = 99.2355, GNorm = 0.7773, lr_0 = 9.1721e-04
Loss = 7.8021e-02, PNorm = 99.3229, GNorm = 0.4978, lr_0 = 9.1658e-04
Loss = 8.6181e-02, PNorm = 99.3937, GNorm = 0.5029, lr_0 = 9.1596e-04
Loss = 6.5369e-02, PNorm = 99.4682, GNorm = 0.3053, lr_0 = 9.1533e-04
Loss = 6.6601e-02, PNorm = 99.5238, GNorm = 0.4497, lr_0 = 9.1470e-04
Loss = 8.1546e-02, PNorm = 99.5861, GNorm = 0.7040, lr_0 = 9.1408e-04
Loss = 6.8887e-02, PNorm = 99.6522, GNorm = 0.4418, lr_0 = 9.1345e-04
Loss = 7.3078e-02, PNorm = 99.7147, GNorm = 0.3669, lr_0 = 9.1282e-04
Loss = 7.8483e-02, PNorm = 99.7814, GNorm = 0.7691, lr_0 = 9.1220e-04
Loss = 8.3645e-02, PNorm = 99.8555, GNorm = 1.1006, lr_0 = 9.1157e-04
Loss = 8.2155e-02, PNorm = 99.9312, GNorm = 1.2322, lr_0 = 9.1095e-04
Loss = 7.0982e-02, PNorm = 100.0094, GNorm = 0.7384, lr_0 = 9.1032e-04
Loss = 7.6632e-02, PNorm = 100.0788, GNorm = 0.9727, lr_0 = 9.0970e-04
Loss = 8.4030e-02, PNorm = 100.1549, GNorm = 0.4941, lr_0 = 9.0908e-04
Loss = 8.5355e-02, PNorm = 100.2330, GNorm = 0.7415, lr_0 = 9.0846e-04
Loss = 8.3853e-02, PNorm = 100.3146, GNorm = 0.3619, lr_0 = 9.0783e-04
Loss = 8.4265e-02, PNorm = 100.3983, GNorm = 0.6758, lr_0 = 9.0721e-04
Loss = 9.4765e-02, PNorm = 100.4773, GNorm = 0.7690, lr_0 = 9.0659e-04
Loss = 7.7530e-02, PNorm = 100.5634, GNorm = 1.0784, lr_0 = 9.0597e-04
Loss = 7.5237e-02, PNorm = 100.6432, GNorm = 0.5113, lr_0 = 9.0535e-04
Loss = 7.9570e-02, PNorm = 100.7157, GNorm = 0.4333, lr_0 = 9.0473e-04
Loss = 6.9849e-02, PNorm = 100.7967, GNorm = 0.4604, lr_0 = 9.0411e-04
Loss = 7.7230e-02, PNorm = 100.8643, GNorm = 0.3023, lr_0 = 9.0349e-04
Loss = 8.7773e-02, PNorm = 100.9395, GNorm = 0.7806, lr_0 = 9.0287e-04
Loss = 9.7513e-02, PNorm = 101.0116, GNorm = 0.8240, lr_0 = 9.0225e-04
Loss = 8.3641e-02, PNorm = 101.1152, GNorm = 0.9118, lr_0 = 9.0163e-04
Loss = 7.2995e-02, PNorm = 101.1934, GNorm = 0.4251, lr_0 = 9.0102e-04
Loss = 8.9488e-02, PNorm = 101.2833, GNorm = 0.5820, lr_0 = 9.0040e-04
Loss = 8.6194e-02, PNorm = 101.3709, GNorm = 0.3696, lr_0 = 8.9978e-04
Loss = 8.7065e-02, PNorm = 101.4634, GNorm = 1.2335, lr_0 = 8.9916e-04
Loss = 8.1042e-02, PNorm = 101.5451, GNorm = 0.8229, lr_0 = 8.9855e-04
Loss = 8.0672e-02, PNorm = 101.6305, GNorm = 0.4986, lr_0 = 8.9793e-04
Loss = 7.6861e-02, PNorm = 101.7192, GNorm = 0.4672, lr_0 = 8.9732e-04
Loss = 8.9519e-02, PNorm = 101.8004, GNorm = 0.9203, lr_0 = 8.9670e-04
Loss = 8.2780e-02, PNorm = 101.8895, GNorm = 0.6672, lr_0 = 8.9609e-04
Loss = 8.1606e-02, PNorm = 101.9865, GNorm = 0.4864, lr_0 = 8.9548e-04
Loss = 8.4308e-02, PNorm = 102.0682, GNorm = 0.5836, lr_0 = 8.9486e-04
Loss = 8.2044e-02, PNorm = 102.1608, GNorm = 0.7211, lr_0 = 8.9425e-04
Loss = 9.8130e-02, PNorm = 102.2440, GNorm = 0.7107, lr_0 = 8.9364e-04
Loss = 8.3036e-02, PNorm = 102.3401, GNorm = 0.4451, lr_0 = 8.9302e-04
Loss = 8.6646e-02, PNorm = 102.4352, GNorm = 0.4867, lr_0 = 8.9241e-04
Loss = 8.9147e-02, PNorm = 102.5300, GNorm = 0.7194, lr_0 = 8.9180e-04
Loss = 9.3150e-02, PNorm = 102.6164, GNorm = 0.6191, lr_0 = 8.9119e-04
Loss = 8.2962e-02, PNorm = 102.7161, GNorm = 0.3904, lr_0 = 8.9058e-04
Loss = 8.1896e-02, PNorm = 102.8093, GNorm = 0.4332, lr_0 = 8.8997e-04
Loss = 8.1904e-02, PNorm = 102.9084, GNorm = 0.5092, lr_0 = 8.8936e-04
Loss = 8.9840e-02, PNorm = 103.0104, GNorm = 0.8600, lr_0 = 8.8875e-04
Loss = 8.1305e-02, PNorm = 103.1070, GNorm = 1.0968, lr_0 = 8.8814e-04
Loss = 9.3490e-02, PNorm = 103.1901, GNorm = 0.8663, lr_0 = 8.8753e-04
Loss = 1.0355e-01, PNorm = 103.2984, GNorm = 0.6581, lr_0 = 8.8693e-04
Loss = 8.1190e-02, PNorm = 103.3940, GNorm = 0.5707, lr_0 = 8.8632e-04
Loss = 8.7203e-02, PNorm = 103.5026, GNorm = 0.8672, lr_0 = 8.8571e-04
Loss = 7.6158e-02, PNorm = 103.6055, GNorm = 0.5729, lr_0 = 8.8510e-04
Loss = 8.7452e-02, PNorm = 103.6907, GNorm = 0.4751, lr_0 = 8.8450e-04
Loss = 8.0535e-02, PNorm = 103.7791, GNorm = 1.2142, lr_0 = 8.8389e-04
Loss = 8.4924e-02, PNorm = 103.8701, GNorm = 0.3782, lr_0 = 8.8329e-04
Loss = 8.7773e-02, PNorm = 103.9676, GNorm = 0.5349, lr_0 = 8.8268e-04
Loss = 9.0871e-02, PNorm = 104.0700, GNorm = 0.8176, lr_0 = 8.8208e-04
Loss = 9.7241e-02, PNorm = 104.1792, GNorm = 0.6185, lr_0 = 8.8147e-04
Loss = 7.7521e-02, PNorm = 104.2825, GNorm = 0.4551, lr_0 = 8.8087e-04
Loss = 8.4253e-02, PNorm = 104.3785, GNorm = 0.6561, lr_0 = 8.8026e-04
Loss = 8.1035e-02, PNorm = 104.4771, GNorm = 0.4827, lr_0 = 8.7966e-04
Loss = 8.5501e-02, PNorm = 104.5657, GNorm = 0.6130, lr_0 = 8.7906e-04
Loss = 8.8466e-02, PNorm = 104.6636, GNorm = 1.2289, lr_0 = 8.7846e-04
Loss = 9.3871e-02, PNorm = 104.7646, GNorm = 1.0215, lr_0 = 8.7785e-04
Loss = 8.6293e-02, PNorm = 104.8730, GNorm = 0.3266, lr_0 = 8.7725e-04
Loss = 9.3731e-02, PNorm = 104.9755, GNorm = 0.6155, lr_0 = 8.7665e-04
Loss = 8.4746e-02, PNorm = 105.0832, GNorm = 0.6064, lr_0 = 8.7605e-04
Loss = 7.9613e-02, PNorm = 105.1703, GNorm = 0.4093, lr_0 = 8.7545e-04
Loss = 8.9148e-02, PNorm = 105.2720, GNorm = 0.8647, lr_0 = 8.7485e-04
Loss = 9.2864e-02, PNorm = 105.3648, GNorm = 1.2104, lr_0 = 8.7425e-04
Loss = 8.5970e-02, PNorm = 105.4689, GNorm = 0.5506, lr_0 = 8.7365e-04
Loss = 9.5731e-02, PNorm = 105.5686, GNorm = 0.4634, lr_0 = 8.7306e-04
Loss = 8.1888e-02, PNorm = 105.6698, GNorm = 0.6032, lr_0 = 8.7246e-04
Loss = 8.7088e-02, PNorm = 105.7685, GNorm = 0.4201, lr_0 = 8.7186e-04
Loss = 8.7963e-02, PNorm = 105.8689, GNorm = 0.5559, lr_0 = 8.7126e-04
Loss = 8.4875e-02, PNorm = 105.9710, GNorm = 0.8765, lr_0 = 8.7067e-04
Loss = 1.0004e-01, PNorm = 106.0732, GNorm = 0.5797, lr_0 = 8.7007e-04
Loss = 8.7006e-02, PNorm = 106.1668, GNorm = 0.6993, lr_0 = 8.6947e-04
Loss = 8.5164e-02, PNorm = 106.2663, GNorm = 0.4727, lr_0 = 8.6888e-04
Loss = 1.0677e-01, PNorm = 106.3653, GNorm = 1.4425, lr_0 = 8.6828e-04
Loss = 9.2907e-02, PNorm = 106.4653, GNorm = 0.7079, lr_0 = 8.6769e-04
Loss = 9.7176e-02, PNorm = 106.5718, GNorm = 0.4967, lr_0 = 8.6709e-04
Loss = 1.0335e-01, PNorm = 106.6928, GNorm = 0.5811, lr_0 = 8.6650e-04
Loss = 9.6181e-02, PNorm = 106.8056, GNorm = 0.4912, lr_0 = 8.6590e-04
Loss = 1.0140e-01, PNorm = 106.9199, GNorm = 0.7182, lr_0 = 8.6531e-04
Loss = 7.9125e-02, PNorm = 107.0325, GNorm = 0.9043, lr_0 = 8.6472e-04
Loss = 9.3024e-02, PNorm = 107.1441, GNorm = 0.9207, lr_0 = 8.6413e-04
Loss = 8.9586e-02, PNorm = 107.2594, GNorm = 0.4452, lr_0 = 8.6353e-04
Loss = 9.3174e-02, PNorm = 107.3674, GNorm = 0.6747, lr_0 = 8.6294e-04
Loss = 9.0267e-02, PNorm = 107.4772, GNorm = 0.6855, lr_0 = 8.6235e-04
Loss = 9.1291e-02, PNorm = 107.5721, GNorm = 0.4020, lr_0 = 8.6176e-04
Loss = 9.7903e-02, PNorm = 107.6836, GNorm = 0.5007, lr_0 = 8.6117e-04
Loss = 9.0636e-02, PNorm = 107.7994, GNorm = 0.4607, lr_0 = 8.6058e-04
Loss = 8.9905e-02, PNorm = 107.9115, GNorm = 0.6170, lr_0 = 8.5999e-04
Loss = 1.0380e-01, PNorm = 108.0218, GNorm = 0.4901, lr_0 = 8.5940e-04
Loss = 8.2138e-02, PNorm = 108.1328, GNorm = 0.8874, lr_0 = 8.5881e-04
Loss = 1.0221e-01, PNorm = 108.2447, GNorm = 1.2367, lr_0 = 8.5823e-04
Loss = 9.0499e-02, PNorm = 108.3482, GNorm = 0.6281, lr_0 = 8.5764e-04
Loss = 9.3856e-02, PNorm = 108.4545, GNorm = 0.4479, lr_0 = 8.5705e-04
Loss = 9.5617e-02, PNorm = 108.5535, GNorm = 0.4191, lr_0 = 8.5646e-04
Loss = 8.8981e-02, PNorm = 108.6585, GNorm = 0.3740, lr_0 = 8.5588e-04
Loss = 9.4850e-02, PNorm = 108.7745, GNorm = 0.7627, lr_0 = 8.5529e-04
Loss = 9.1957e-02, PNorm = 108.8858, GNorm = 0.6156, lr_0 = 8.5470e-04
Loss = 8.8711e-02, PNorm = 108.9933, GNorm = 0.4484, lr_0 = 8.5412e-04
Loss = 8.9227e-02, PNorm = 109.0995, GNorm = 0.4065, lr_0 = 8.5353e-04
Loss = 8.9615e-02, PNorm = 109.1946, GNorm = 0.4558, lr_0 = 8.5295e-04
Loss = 8.8062e-02, PNorm = 109.2911, GNorm = 0.9094, lr_0 = 8.5236e-04
Loss = 1.0273e-01, PNorm = 109.3952, GNorm = 0.8037, lr_0 = 8.5178e-04
Loss = 9.9570e-02, PNorm = 109.5133, GNorm = 0.4600, lr_0 = 8.5120e-04
Loss = 9.0715e-02, PNorm = 109.6335, GNorm = 0.6045, lr_0 = 8.5061e-04
Loss = 9.1287e-02, PNorm = 109.7470, GNorm = 0.4426, lr_0 = 8.5003e-04
Loss = 1.0246e-01, PNorm = 109.8454, GNorm = 0.9721, lr_0 = 8.4945e-04
Loss = 1.0250e-01, PNorm = 109.9586, GNorm = 0.6845, lr_0 = 8.4887e-04
Loss = 9.4342e-02, PNorm = 110.0678, GNorm = 0.4553, lr_0 = 8.4828e-04
Validation mae = 0.291817
Epoch 4
Loss = 6.5518e-02, PNorm = 110.1564, GNorm = 0.4983, lr_0 = 8.4770e-04
Loss = 6.0642e-02, PNorm = 110.2349, GNorm = 0.6098, lr_0 = 8.4712e-04
Loss = 4.9698e-02, PNorm = 110.2958, GNorm = 0.8715, lr_0 = 8.4654e-04
Loss = 5.9858e-02, PNorm = 110.3615, GNorm = 0.3816, lr_0 = 8.4596e-04
Loss = 5.4372e-02, PNorm = 110.4204, GNorm = 0.4776, lr_0 = 8.4538e-04
Loss = 5.1118e-02, PNorm = 110.4912, GNorm = 0.3495, lr_0 = 8.4480e-04
Loss = 5.1347e-02, PNorm = 110.5508, GNorm = 0.6623, lr_0 = 8.4423e-04
Loss = 4.9974e-02, PNorm = 110.6169, GNorm = 0.3095, lr_0 = 8.4365e-04
Loss = 6.2672e-02, PNorm = 110.6724, GNorm = 0.7381, lr_0 = 8.4307e-04
Loss = 5.4737e-02, PNorm = 110.7376, GNorm = 0.4354, lr_0 = 8.4249e-04
Loss = 5.7721e-02, PNorm = 110.7934, GNorm = 1.0489, lr_0 = 8.4191e-04
Loss = 5.4050e-02, PNorm = 110.8594, GNorm = 0.4289, lr_0 = 8.4134e-04
Loss = 5.6290e-02, PNorm = 110.9221, GNorm = 0.2720, lr_0 = 8.4076e-04
Loss = 6.0288e-02, PNorm = 110.9866, GNorm = 0.3813, lr_0 = 8.4019e-04
Loss = 4.9357e-02, PNorm = 111.0462, GNorm = 0.4538, lr_0 = 8.3961e-04
Loss = 5.3149e-02, PNorm = 111.1115, GNorm = 0.8039, lr_0 = 8.3903e-04
Loss = 5.4066e-02, PNorm = 111.1685, GNorm = 0.3879, lr_0 = 8.3846e-04
Loss = 5.9496e-02, PNorm = 111.2392, GNorm = 0.8388, lr_0 = 8.3789e-04
Loss = 5.5463e-02, PNorm = 111.2958, GNorm = 0.3130, lr_0 = 8.3731e-04
Loss = 5.5230e-02, PNorm = 111.3639, GNorm = 0.6055, lr_0 = 8.3674e-04
Loss = 5.5198e-02, PNorm = 111.4158, GNorm = 0.7690, lr_0 = 8.3616e-04
Loss = 5.2246e-02, PNorm = 111.4821, GNorm = 0.3445, lr_0 = 8.3559e-04
Loss = 5.7951e-02, PNorm = 111.5524, GNorm = 0.5314, lr_0 = 8.3502e-04
Loss = 5.6529e-02, PNorm = 111.6230, GNorm = 0.3768, lr_0 = 8.3445e-04
Loss = 5.7309e-02, PNorm = 111.7024, GNorm = 1.0209, lr_0 = 8.3388e-04
Loss = 4.9725e-02, PNorm = 111.7681, GNorm = 0.3262, lr_0 = 8.3330e-04
Loss = 4.9958e-02, PNorm = 111.8382, GNorm = 0.2049, lr_0 = 8.3273e-04
Loss = 4.9077e-02, PNorm = 111.8949, GNorm = 0.8326, lr_0 = 8.3216e-04
Loss = 5.7911e-02, PNorm = 111.9739, GNorm = 0.3953, lr_0 = 8.3159e-04
Loss = 4.7012e-02, PNorm = 112.0430, GNorm = 0.3807, lr_0 = 8.3102e-04
Loss = 5.5423e-02, PNorm = 112.1168, GNorm = 0.4056, lr_0 = 8.3045e-04
Loss = 6.1769e-02, PNorm = 112.1860, GNorm = 0.5192, lr_0 = 8.2988e-04
Loss = 5.3728e-02, PNorm = 112.2577, GNorm = 0.2957, lr_0 = 8.2932e-04
Loss = 4.8977e-02, PNorm = 112.3282, GNorm = 0.5971, lr_0 = 8.2875e-04
Loss = 5.4341e-02, PNorm = 112.3968, GNorm = 0.3651, lr_0 = 8.2818e-04
Loss = 4.8706e-02, PNorm = 112.4682, GNorm = 0.4755, lr_0 = 8.2761e-04
Loss = 6.6757e-02, PNorm = 112.5452, GNorm = 0.5207, lr_0 = 8.2705e-04
Loss = 5.4897e-02, PNorm = 112.6292, GNorm = 0.3344, lr_0 = 8.2648e-04
Loss = 5.8284e-02, PNorm = 112.7033, GNorm = 0.5426, lr_0 = 8.2591e-04
Loss = 5.6775e-02, PNorm = 112.7774, GNorm = 0.2609, lr_0 = 8.2535e-04
Loss = 5.7051e-02, PNorm = 112.8500, GNorm = 0.4600, lr_0 = 8.2478e-04
Loss = 5.8872e-02, PNorm = 112.9207, GNorm = 0.4314, lr_0 = 8.2422e-04
Loss = 6.5000e-02, PNorm = 112.9979, GNorm = 0.7333, lr_0 = 8.2365e-04
Loss = 5.6970e-02, PNorm = 113.0766, GNorm = 0.4715, lr_0 = 8.2309e-04
Loss = 5.5359e-02, PNorm = 113.1544, GNorm = 0.6009, lr_0 = 8.2252e-04
Loss = 6.3940e-02, PNorm = 113.2293, GNorm = 0.4833, lr_0 = 8.2196e-04
Loss = 6.1263e-02, PNorm = 113.3122, GNorm = 0.3536, lr_0 = 8.2140e-04
Loss = 5.3984e-02, PNorm = 113.4005, GNorm = 0.3592, lr_0 = 8.2084e-04
Loss = 5.9722e-02, PNorm = 113.4771, GNorm = 0.4567, lr_0 = 8.2027e-04
Loss = 5.5588e-02, PNorm = 113.5608, GNorm = 0.5687, lr_0 = 8.1971e-04
Loss = 6.2649e-02, PNorm = 113.6339, GNorm = 0.7047, lr_0 = 8.1915e-04
Loss = 5.8644e-02, PNorm = 113.7244, GNorm = 0.6247, lr_0 = 8.1859e-04
Loss = 5.9174e-02, PNorm = 113.8101, GNorm = 0.5462, lr_0 = 8.1803e-04
Loss = 7.0490e-02, PNorm = 113.8885, GNorm = 0.5586, lr_0 = 8.1747e-04
Loss = 6.2693e-02, PNorm = 113.9762, GNorm = 0.7526, lr_0 = 8.1691e-04
Loss = 5.7030e-02, PNorm = 114.0574, GNorm = 0.5288, lr_0 = 8.1635e-04
Loss = 5.5823e-02, PNorm = 114.1360, GNorm = 0.3155, lr_0 = 8.1579e-04
Loss = 6.0779e-02, PNorm = 114.2144, GNorm = 0.4743, lr_0 = 8.1523e-04
Loss = 5.8619e-02, PNorm = 114.2878, GNorm = 0.3914, lr_0 = 8.1467e-04
Loss = 6.0564e-02, PNorm = 114.3660, GNorm = 1.0482, lr_0 = 8.1411e-04
Loss = 5.6969e-02, PNorm = 114.4342, GNorm = 0.4856, lr_0 = 8.1355e-04
Loss = 6.0573e-02, PNorm = 114.5099, GNorm = 0.7355, lr_0 = 8.1300e-04
Loss = 6.0968e-02, PNorm = 114.5794, GNorm = 0.3551, lr_0 = 8.1244e-04
Loss = 5.6608e-02, PNorm = 114.6648, GNorm = 0.7243, lr_0 = 8.1188e-04
Loss = 5.8553e-02, PNorm = 114.7336, GNorm = 0.4769, lr_0 = 8.1133e-04
Loss = 6.3288e-02, PNorm = 114.8264, GNorm = 0.7961, lr_0 = 8.1077e-04
Loss = 6.2058e-02, PNorm = 114.9119, GNorm = 0.5225, lr_0 = 8.1022e-04
Loss = 5.6433e-02, PNorm = 115.0139, GNorm = 0.3036, lr_0 = 8.0966e-04
Loss = 5.8080e-02, PNorm = 115.0891, GNorm = 0.4346, lr_0 = 8.0911e-04
Loss = 5.7744e-02, PNorm = 115.1596, GNorm = 0.4110, lr_0 = 8.0855e-04
Loss = 6.5697e-02, PNorm = 115.2431, GNorm = 0.5799, lr_0 = 8.0800e-04
Loss = 5.5715e-02, PNorm = 115.3245, GNorm = 0.6292, lr_0 = 8.0745e-04
Loss = 6.0689e-02, PNorm = 115.4123, GNorm = 0.4353, lr_0 = 8.0689e-04
Loss = 7.2983e-02, PNorm = 115.4974, GNorm = 0.6572, lr_0 = 8.0634e-04
Loss = 6.7239e-02, PNorm = 115.5918, GNorm = 0.6360, lr_0 = 8.0579e-04
Loss = 5.8300e-02, PNorm = 115.6865, GNorm = 0.6719, lr_0 = 8.0523e-04
Loss = 6.4715e-02, PNorm = 115.7679, GNorm = 0.5134, lr_0 = 8.0468e-04
Loss = 6.2493e-02, PNorm = 115.8603, GNorm = 0.9385, lr_0 = 8.0413e-04
Loss = 6.3642e-02, PNorm = 115.9505, GNorm = 0.7093, lr_0 = 8.0358e-04
Loss = 6.1839e-02, PNorm = 116.0414, GNorm = 0.4506, lr_0 = 8.0303e-04
Loss = 6.7356e-02, PNorm = 116.1331, GNorm = 0.5920, lr_0 = 8.0248e-04
Loss = 6.3190e-02, PNorm = 116.2230, GNorm = 0.8132, lr_0 = 8.0193e-04
Loss = 6.1267e-02, PNorm = 116.3090, GNorm = 0.5191, lr_0 = 8.0138e-04
Loss = 6.6727e-02, PNorm = 116.3952, GNorm = 0.3776, lr_0 = 8.0083e-04
Loss = 5.2284e-02, PNorm = 116.4894, GNorm = 0.2989, lr_0 = 8.0028e-04
Loss = 5.9547e-02, PNorm = 116.5760, GNorm = 0.6877, lr_0 = 7.9974e-04
Loss = 5.5791e-02, PNorm = 116.6586, GNorm = 0.2456, lr_0 = 7.9919e-04
Loss = 6.7549e-02, PNorm = 116.7336, GNorm = 0.3908, lr_0 = 7.9864e-04
Loss = 5.4608e-02, PNorm = 116.8185, GNorm = 0.6468, lr_0 = 7.9809e-04
Loss = 6.9070e-02, PNorm = 116.9038, GNorm = 0.5046, lr_0 = 7.9755e-04
Loss = 7.0998e-02, PNorm = 116.9969, GNorm = 0.7298, lr_0 = 7.9700e-04
Loss = 7.0160e-02, PNorm = 117.0907, GNorm = 0.6876, lr_0 = 7.9645e-04
Loss = 6.4237e-02, PNorm = 117.1888, GNorm = 0.7588, lr_0 = 7.9591e-04
Loss = 6.8635e-02, PNorm = 117.2847, GNorm = 0.8346, lr_0 = 7.9536e-04
Loss = 6.2279e-02, PNorm = 117.3859, GNorm = 0.3530, lr_0 = 7.9482e-04
Loss = 6.5249e-02, PNorm = 117.4869, GNorm = 0.9534, lr_0 = 7.9427e-04
Loss = 5.6854e-02, PNorm = 117.5772, GNorm = 0.5429, lr_0 = 7.9373e-04
Loss = 6.8028e-02, PNorm = 117.6710, GNorm = 0.7279, lr_0 = 7.9319e-04
Loss = 6.9900e-02, PNorm = 117.7659, GNorm = 0.3480, lr_0 = 7.9264e-04
Loss = 7.5030e-02, PNorm = 117.8607, GNorm = 0.7702, lr_0 = 7.9210e-04
Loss = 7.2584e-02, PNorm = 117.9682, GNorm = 0.8042, lr_0 = 7.9156e-04
Loss = 7.0471e-02, PNorm = 118.0664, GNorm = 0.6719, lr_0 = 7.9101e-04
Loss = 6.3053e-02, PNorm = 118.1684, GNorm = 0.8744, lr_0 = 7.9047e-04
Loss = 6.2103e-02, PNorm = 118.2641, GNorm = 0.3620, lr_0 = 7.8993e-04
Loss = 6.1298e-02, PNorm = 118.3470, GNorm = 0.5474, lr_0 = 7.8939e-04
Loss = 5.6336e-02, PNorm = 118.4368, GNorm = 0.3404, lr_0 = 7.8885e-04
Loss = 6.3023e-02, PNorm = 118.5311, GNorm = 0.4877, lr_0 = 7.8831e-04
Loss = 7.2380e-02, PNorm = 118.6295, GNorm = 0.7520, lr_0 = 7.8777e-04
Loss = 6.4893e-02, PNorm = 118.7267, GNorm = 0.7513, lr_0 = 7.8723e-04
Loss = 6.9479e-02, PNorm = 118.8286, GNorm = 0.6514, lr_0 = 7.8669e-04
Loss = 7.7016e-02, PNorm = 118.9251, GNorm = 0.8042, lr_0 = 7.8615e-04
Loss = 6.4175e-02, PNorm = 119.0293, GNorm = 0.9637, lr_0 = 7.8561e-04
Loss = 7.2783e-02, PNorm = 119.1239, GNorm = 0.6466, lr_0 = 7.8507e-04
Loss = 7.3787e-02, PNorm = 119.2221, GNorm = 0.4369, lr_0 = 7.8454e-04
Loss = 6.5846e-02, PNorm = 119.3224, GNorm = 0.7004, lr_0 = 7.8400e-04
Loss = 6.4042e-02, PNorm = 119.4252, GNorm = 0.4618, lr_0 = 7.8346e-04
Loss = 6.6731e-02, PNorm = 119.5126, GNorm = 0.4803, lr_0 = 7.8293e-04
Loss = 6.9630e-02, PNorm = 119.6043, GNorm = 0.4522, lr_0 = 7.8239e-04
Loss = 6.3840e-02, PNorm = 119.6953, GNorm = 0.5332, lr_0 = 7.8185e-04
Loss = 7.1744e-02, PNorm = 119.7923, GNorm = 0.9773, lr_0 = 7.8132e-04
Validation mae = 0.291803
Epoch 5
Loss = 4.3430e-02, PNorm = 119.8756, GNorm = 0.3722, lr_0 = 7.8078e-04
Loss = 4.1692e-02, PNorm = 119.9490, GNorm = 0.8888, lr_0 = 7.8025e-04
Loss = 4.1316e-02, PNorm = 120.0054, GNorm = 0.3408, lr_0 = 7.7971e-04
Loss = 4.6111e-02, PNorm = 120.0638, GNorm = 0.3130, lr_0 = 7.7918e-04
Loss = 4.1308e-02, PNorm = 120.1178, GNorm = 0.5238, lr_0 = 7.7864e-04
Loss = 4.3621e-02, PNorm = 120.1779, GNorm = 0.8010, lr_0 = 7.7811e-04
Loss = 4.1919e-02, PNorm = 120.2310, GNorm = 0.4251, lr_0 = 7.7758e-04
Loss = 4.4197e-02, PNorm = 120.2883, GNorm = 0.5391, lr_0 = 7.7705e-04
Loss = 4.7843e-02, PNorm = 120.3425, GNorm = 0.4642, lr_0 = 7.7651e-04
Loss = 4.5543e-02, PNorm = 120.3994, GNorm = 0.5336, lr_0 = 7.7598e-04
Loss = 4.1611e-02, PNorm = 120.4648, GNorm = 0.4464, lr_0 = 7.7545e-04
Loss = 4.9558e-02, PNorm = 120.5151, GNorm = 1.0073, lr_0 = 7.7492e-04
Loss = 4.4521e-02, PNorm = 120.5855, GNorm = 0.6620, lr_0 = 7.7439e-04
Loss = 4.2773e-02, PNorm = 120.6457, GNorm = 0.2624, lr_0 = 7.7386e-04
Loss = 4.4542e-02, PNorm = 120.7031, GNorm = 0.5759, lr_0 = 7.7333e-04
Loss = 3.8956e-02, PNorm = 120.7601, GNorm = 0.2823, lr_0 = 7.7280e-04
Loss = 4.7362e-02, PNorm = 120.8169, GNorm = 0.5162, lr_0 = 7.7227e-04
Loss = 3.9454e-02, PNorm = 120.8715, GNorm = 0.3126, lr_0 = 7.7174e-04
Loss = 3.7856e-02, PNorm = 120.9300, GNorm = 0.5382, lr_0 = 7.7121e-04
Loss = 3.6560e-02, PNorm = 120.9894, GNorm = 0.2736, lr_0 = 7.7068e-04
Loss = 4.4179e-02, PNorm = 121.0478, GNorm = 0.5573, lr_0 = 7.7015e-04
Loss = 4.1584e-02, PNorm = 121.1100, GNorm = 0.2776, lr_0 = 7.6963e-04
Loss = 4.8252e-02, PNorm = 121.1747, GNorm = 0.8356, lr_0 = 7.6910e-04
Loss = 4.7144e-02, PNorm = 121.2395, GNorm = 0.6824, lr_0 = 7.6857e-04
Loss = 3.9025e-02, PNorm = 121.2990, GNorm = 0.4709, lr_0 = 7.6805e-04
Loss = 4.2396e-02, PNorm = 121.3638, GNorm = 0.3577, lr_0 = 7.6752e-04
Loss = 4.1825e-02, PNorm = 121.4200, GNorm = 0.7888, lr_0 = 7.6699e-04
Loss = 4.1230e-02, PNorm = 121.4870, GNorm = 0.7075, lr_0 = 7.6647e-04
Loss = 3.8815e-02, PNorm = 121.5473, GNorm = 0.4695, lr_0 = 7.6594e-04
Loss = 4.1226e-02, PNorm = 121.6107, GNorm = 0.5794, lr_0 = 7.6542e-04
Loss = 3.7791e-02, PNorm = 121.6641, GNorm = 0.2684, lr_0 = 7.6489e-04
Loss = 4.4448e-02, PNorm = 121.7274, GNorm = 0.4923, lr_0 = 7.6437e-04
Loss = 4.3114e-02, PNorm = 121.7958, GNorm = 0.3883, lr_0 = 7.6385e-04
Loss = 3.7317e-02, PNorm = 121.8574, GNorm = 0.4550, lr_0 = 7.6332e-04
Loss = 4.3092e-02, PNorm = 121.9211, GNorm = 0.3874, lr_0 = 7.6280e-04
Loss = 3.7659e-02, PNorm = 121.9799, GNorm = 0.2579, lr_0 = 7.6228e-04
Loss = 4.0348e-02, PNorm = 122.0461, GNorm = 0.3169, lr_0 = 7.6176e-04
Loss = 4.0540e-02, PNorm = 122.1088, GNorm = 0.4415, lr_0 = 7.6123e-04
Loss = 4.1826e-02, PNorm = 122.1745, GNorm = 0.4973, lr_0 = 7.6071e-04
Loss = 4.2495e-02, PNorm = 122.2333, GNorm = 0.4385, lr_0 = 7.6019e-04
Loss = 4.3146e-02, PNorm = 122.3078, GNorm = 0.7304, lr_0 = 7.5967e-04
Loss = 4.0280e-02, PNorm = 122.3721, GNorm = 0.2892, lr_0 = 7.5915e-04
Loss = 4.2060e-02, PNorm = 122.4402, GNorm = 0.4050, lr_0 = 7.5863e-04
Loss = 4.6930e-02, PNorm = 122.4994, GNorm = 0.3239, lr_0 = 7.5811e-04
Loss = 4.5364e-02, PNorm = 122.5601, GNorm = 0.4030, lr_0 = 7.5759e-04
Loss = 4.3567e-02, PNorm = 122.6232, GNorm = 0.6076, lr_0 = 7.5707e-04
Loss = 4.3435e-02, PNorm = 122.6843, GNorm = 0.2644, lr_0 = 7.5655e-04
Loss = 3.8437e-02, PNorm = 122.7560, GNorm = 0.6340, lr_0 = 7.5603e-04
Loss = 4.0696e-02, PNorm = 122.8109, GNorm = 0.8011, lr_0 = 7.5552e-04
Loss = 4.2543e-02, PNorm = 122.8756, GNorm = 0.3536, lr_0 = 7.5500e-04
Loss = 4.2611e-02, PNorm = 122.9376, GNorm = 0.5889, lr_0 = 7.5448e-04
Loss = 5.2173e-02, PNorm = 123.0069, GNorm = 0.8237, lr_0 = 7.5397e-04
Loss = 3.8878e-02, PNorm = 123.0733, GNorm = 0.7850, lr_0 = 7.5345e-04
Loss = 3.8783e-02, PNorm = 123.1313, GNorm = 0.5758, lr_0 = 7.5293e-04
Loss = 4.1082e-02, PNorm = 123.1956, GNorm = 0.2949, lr_0 = 7.5242e-04
Loss = 4.2983e-02, PNorm = 123.2632, GNorm = 0.3212, lr_0 = 7.5190e-04
Loss = 4.2739e-02, PNorm = 123.3351, GNorm = 0.3469, lr_0 = 7.5139e-04
Loss = 3.9309e-02, PNorm = 123.4036, GNorm = 0.4322, lr_0 = 7.5087e-04
Loss = 4.2042e-02, PNorm = 123.4718, GNorm = 0.8355, lr_0 = 7.5036e-04
Loss = 4.4005e-02, PNorm = 123.5397, GNorm = 0.6784, lr_0 = 7.4984e-04
Loss = 3.7249e-02, PNorm = 123.6058, GNorm = 0.2034, lr_0 = 7.4933e-04
Loss = 4.1569e-02, PNorm = 123.6726, GNorm = 0.6177, lr_0 = 7.4882e-04
Loss = 4.3907e-02, PNorm = 123.7462, GNorm = 0.2954, lr_0 = 7.4830e-04
Loss = 4.7468e-02, PNorm = 123.8123, GNorm = 1.0478, lr_0 = 7.4779e-04
Loss = 4.9722e-02, PNorm = 123.8868, GNorm = 0.7688, lr_0 = 7.4728e-04
Loss = 4.9144e-02, PNorm = 123.9631, GNorm = 1.2139, lr_0 = 7.4677e-04
Loss = 4.3909e-02, PNorm = 124.0361, GNorm = 0.5995, lr_0 = 7.4625e-04
Loss = 4.4287e-02, PNorm = 124.1038, GNorm = 0.8141, lr_0 = 7.4574e-04
Loss = 5.3181e-02, PNorm = 124.1710, GNorm = 0.4974, lr_0 = 7.4523e-04
Loss = 4.9125e-02, PNorm = 124.2403, GNorm = 0.7078, lr_0 = 7.4472e-04
Loss = 4.3120e-02, PNorm = 124.3130, GNorm = 0.4423, lr_0 = 7.4421e-04
Loss = 4.3931e-02, PNorm = 124.3922, GNorm = 0.3348, lr_0 = 7.4370e-04
Loss = 3.9765e-02, PNorm = 124.4579, GNorm = 0.5341, lr_0 = 7.4319e-04
Loss = 4.1454e-02, PNorm = 124.5348, GNorm = 0.6642, lr_0 = 7.4268e-04
Loss = 4.7342e-02, PNorm = 124.6001, GNorm = 0.5367, lr_0 = 7.4217e-04
Loss = 4.5643e-02, PNorm = 124.6847, GNorm = 0.9508, lr_0 = 7.4167e-04
Loss = 4.4269e-02, PNorm = 124.7526, GNorm = 0.7366, lr_0 = 7.4116e-04
Loss = 4.8006e-02, PNorm = 124.8269, GNorm = 1.1518, lr_0 = 7.4065e-04
Loss = 4.1305e-02, PNorm = 124.8901, GNorm = 0.3281, lr_0 = 7.4014e-04
Loss = 4.2184e-02, PNorm = 124.9660, GNorm = 0.5282, lr_0 = 7.3964e-04
Loss = 4.4999e-02, PNorm = 125.0360, GNorm = 0.4768, lr_0 = 7.3913e-04
Loss = 4.2549e-02, PNorm = 125.1163, GNorm = 0.3355, lr_0 = 7.3862e-04
Loss = 4.9930e-02, PNorm = 125.1850, GNorm = 0.4885, lr_0 = 7.3812e-04
Loss = 5.3850e-02, PNorm = 125.2639, GNorm = 0.4102, lr_0 = 7.3761e-04
Loss = 4.3140e-02, PNorm = 125.3421, GNorm = 0.4295, lr_0 = 7.3711e-04
Loss = 5.1956e-02, PNorm = 125.4337, GNorm = 0.4560, lr_0 = 7.3660e-04
Loss = 4.8369e-02, PNorm = 125.5092, GNorm = 0.4365, lr_0 = 7.3610e-04
Loss = 4.5539e-02, PNorm = 125.5964, GNorm = 0.5943, lr_0 = 7.3559e-04
Loss = 4.3217e-02, PNorm = 125.6686, GNorm = 0.4756, lr_0 = 7.3509e-04
Loss = 4.9790e-02, PNorm = 125.7444, GNorm = 0.2761, lr_0 = 7.3458e-04
Loss = 4.4736e-02, PNorm = 125.8185, GNorm = 0.7876, lr_0 = 7.3408e-04
Loss = 4.1274e-02, PNorm = 125.8969, GNorm = 0.2115, lr_0 = 7.3358e-04
Loss = 4.1809e-02, PNorm = 125.9778, GNorm = 0.7600, lr_0 = 7.3308e-04
Loss = 4.5300e-02, PNorm = 126.0451, GNorm = 0.3069, lr_0 = 7.3257e-04
Loss = 5.0203e-02, PNorm = 126.1300, GNorm = 0.2958, lr_0 = 7.3207e-04
Loss = 5.0851e-02, PNorm = 126.2076, GNorm = 0.8417, lr_0 = 7.3157e-04
Loss = 4.7602e-02, PNorm = 126.2952, GNorm = 0.4434, lr_0 = 7.3107e-04
Loss = 4.2066e-02, PNorm = 126.3792, GNorm = 0.2759, lr_0 = 7.3057e-04
Loss = 4.5100e-02, PNorm = 126.4536, GNorm = 0.8112, lr_0 = 7.3007e-04
Loss = 3.9024e-02, PNorm = 126.5311, GNorm = 0.4418, lr_0 = 7.2957e-04
Loss = 4.0148e-02, PNorm = 126.5987, GNorm = 1.2857, lr_0 = 7.2907e-04
Loss = 4.8807e-02, PNorm = 126.6788, GNorm = 0.3364, lr_0 = 7.2857e-04
Loss = 4.3431e-02, PNorm = 126.7620, GNorm = 0.6231, lr_0 = 7.2807e-04
Loss = 4.4946e-02, PNorm = 126.8357, GNorm = 0.3918, lr_0 = 7.2757e-04
Loss = 4.8940e-02, PNorm = 126.9109, GNorm = 0.5018, lr_0 = 7.2707e-04
Loss = 4.5669e-02, PNorm = 126.9904, GNorm = 0.7549, lr_0 = 7.2657e-04
Loss = 4.0344e-02, PNorm = 127.0634, GNorm = 0.3067, lr_0 = 7.2608e-04
Loss = 4.9797e-02, PNorm = 127.1412, GNorm = 0.4335, lr_0 = 7.2558e-04
Loss = 4.8260e-02, PNorm = 127.2124, GNorm = 0.7524, lr_0 = 7.2508e-04
Loss = 5.2182e-02, PNorm = 127.2936, GNorm = 0.5884, lr_0 = 7.2458e-04
Loss = 4.7078e-02, PNorm = 127.3726, GNorm = 0.5218, lr_0 = 7.2409e-04
Loss = 4.8357e-02, PNorm = 127.4536, GNorm = 0.3520, lr_0 = 7.2359e-04
Loss = 5.1640e-02, PNorm = 127.5401, GNorm = 0.6947, lr_0 = 7.2310e-04
Loss = 5.5663e-02, PNorm = 127.6349, GNorm = 0.7940, lr_0 = 7.2260e-04
Loss = 4.5730e-02, PNorm = 127.7273, GNorm = 0.4578, lr_0 = 7.2211e-04
Loss = 5.3192e-02, PNorm = 127.8142, GNorm = 0.5895, lr_0 = 7.2161e-04
Loss = 4.8619e-02, PNorm = 127.8984, GNorm = 0.5284, lr_0 = 7.2112e-04
Loss = 5.4544e-02, PNorm = 127.9851, GNorm = 0.2976, lr_0 = 7.2062e-04
Loss = 4.7539e-02, PNorm = 128.0779, GNorm = 0.8245, lr_0 = 7.2013e-04
Loss = 5.1778e-02, PNorm = 128.1604, GNorm = 0.4543, lr_0 = 7.1964e-04
Validation mae = 0.288390
Epoch 6
Loss = 3.4615e-02, PNorm = 128.2325, GNorm = 0.6721, lr_0 = 7.1914e-04
Loss = 3.8212e-02, PNorm = 128.2872, GNorm = 0.5629, lr_0 = 7.1865e-04
Loss = 3.3431e-02, PNorm = 128.3387, GNorm = 0.4428, lr_0 = 7.1816e-04
Loss = 3.6924e-02, PNorm = 128.3928, GNorm = 0.4344, lr_0 = 7.1767e-04
Loss = 3.6956e-02, PNorm = 128.4461, GNorm = 0.7457, lr_0 = 7.1717e-04
Loss = 3.8586e-02, PNorm = 128.5014, GNorm = 0.6112, lr_0 = 7.1668e-04
Loss = 3.5692e-02, PNorm = 128.5513, GNorm = 0.5068, lr_0 = 7.1619e-04
Loss = 3.9295e-02, PNorm = 128.6055, GNorm = 0.3185, lr_0 = 7.1570e-04
Loss = 3.1616e-02, PNorm = 128.6558, GNorm = 0.2694, lr_0 = 7.1521e-04
Loss = 3.4259e-02, PNorm = 128.7047, GNorm = 0.2791, lr_0 = 7.1472e-04
Loss = 3.3036e-02, PNorm = 128.7570, GNorm = 0.6893, lr_0 = 7.1423e-04
Loss = 3.4498e-02, PNorm = 128.8122, GNorm = 0.5135, lr_0 = 7.1374e-04
Loss = 3.1594e-02, PNorm = 128.8677, GNorm = 0.6540, lr_0 = 7.1325e-04
Loss = 3.1908e-02, PNorm = 128.9173, GNorm = 0.2672, lr_0 = 7.1277e-04
Loss = 3.6328e-02, PNorm = 128.9611, GNorm = 0.3806, lr_0 = 7.1228e-04
Loss = 2.9464e-02, PNorm = 129.0078, GNorm = 0.3130, lr_0 = 7.1179e-04
Loss = 2.9903e-02, PNorm = 129.0660, GNorm = 0.2015, lr_0 = 7.1130e-04
Loss = 3.9156e-02, PNorm = 129.1185, GNorm = 0.3171, lr_0 = 7.1081e-04
Loss = 3.5070e-02, PNorm = 129.1771, GNorm = 0.4419, lr_0 = 7.1033e-04
Loss = 2.8683e-02, PNorm = 129.2289, GNorm = 0.6183, lr_0 = 7.0984e-04
Loss = 2.8517e-02, PNorm = 129.2756, GNorm = 0.6062, lr_0 = 7.0935e-04
Loss = 2.5232e-02, PNorm = 129.3256, GNorm = 0.1862, lr_0 = 7.0887e-04
Loss = 3.1306e-02, PNorm = 129.3690, GNorm = 0.4314, lr_0 = 7.0838e-04
Loss = 3.2773e-02, PNorm = 129.4216, GNorm = 0.2497, lr_0 = 7.0790e-04
Loss = 3.3328e-02, PNorm = 129.4733, GNorm = 0.5263, lr_0 = 7.0741e-04
Loss = 3.0033e-02, PNorm = 129.5247, GNorm = 0.2159, lr_0 = 7.0693e-04
Loss = 3.1590e-02, PNorm = 129.5748, GNorm = 0.3347, lr_0 = 7.0644e-04
Loss = 4.0356e-02, PNorm = 129.6213, GNorm = 0.4303, lr_0 = 7.0596e-04
Loss = 3.3403e-02, PNorm = 129.6748, GNorm = 0.6332, lr_0 = 7.0548e-04
Loss = 3.0294e-02, PNorm = 129.7286, GNorm = 0.2317, lr_0 = 7.0499e-04
Loss = 3.2012e-02, PNorm = 129.7730, GNorm = 0.6442, lr_0 = 7.0451e-04
Loss = 2.9320e-02, PNorm = 129.8363, GNorm = 0.3545, lr_0 = 7.0403e-04
Loss = 2.9088e-02, PNorm = 129.8864, GNorm = 0.5267, lr_0 = 7.0354e-04
Loss = 3.3185e-02, PNorm = 129.9380, GNorm = 0.3432, lr_0 = 7.0306e-04
Loss = 3.3583e-02, PNorm = 129.9842, GNorm = 0.3177, lr_0 = 7.0258e-04
Loss = 3.1327e-02, PNorm = 130.0434, GNorm = 0.2418, lr_0 = 7.0210e-04
Loss = 3.1044e-02, PNorm = 130.0951, GNorm = 0.4801, lr_0 = 7.0162e-04
Loss = 3.4805e-02, PNorm = 130.1457, GNorm = 0.3521, lr_0 = 7.0114e-04
Loss = 3.0988e-02, PNorm = 130.1978, GNorm = 0.2468, lr_0 = 7.0066e-04
Loss = 3.2673e-02, PNorm = 130.2510, GNorm = 0.9099, lr_0 = 7.0018e-04
Loss = 3.3749e-02, PNorm = 130.3073, GNorm = 0.4292, lr_0 = 6.9970e-04
Loss = 3.1555e-02, PNorm = 130.3619, GNorm = 0.2521, lr_0 = 6.9922e-04
Loss = 3.3863e-02, PNorm = 130.4155, GNorm = 0.6745, lr_0 = 6.9874e-04
Loss = 2.9434e-02, PNorm = 130.4654, GNorm = 0.2700, lr_0 = 6.9826e-04
Loss = 3.0750e-02, PNorm = 130.5161, GNorm = 0.5351, lr_0 = 6.9778e-04
Loss = 3.3244e-02, PNorm = 130.5703, GNorm = 0.3049, lr_0 = 6.9730e-04
Loss = 3.3618e-02, PNorm = 130.6348, GNorm = 0.4347, lr_0 = 6.9683e-04
Loss = 3.4378e-02, PNorm = 130.6940, GNorm = 0.4564, lr_0 = 6.9635e-04
Loss = 3.2768e-02, PNorm = 130.7549, GNorm = 0.7014, lr_0 = 6.9587e-04
Loss = 2.7781e-02, PNorm = 130.8094, GNorm = 0.3013, lr_0 = 6.9540e-04
Loss = 2.9008e-02, PNorm = 130.8637, GNorm = 0.6679, lr_0 = 6.9492e-04
Loss = 2.8930e-02, PNorm = 130.9255, GNorm = 0.5914, lr_0 = 6.9444e-04
Loss = 3.5318e-02, PNorm = 130.9819, GNorm = 0.2238, lr_0 = 6.9397e-04
Loss = 3.1365e-02, PNorm = 131.0433, GNorm = 0.6172, lr_0 = 6.9349e-04
Loss = 2.9740e-02, PNorm = 131.1103, GNorm = 0.3646, lr_0 = 6.9302e-04
Loss = 3.1075e-02, PNorm = 131.1667, GNorm = 0.5485, lr_0 = 6.9254e-04
Loss = 2.7443e-02, PNorm = 131.2264, GNorm = 0.3485, lr_0 = 6.9207e-04
Loss = 3.4146e-02, PNorm = 131.2792, GNorm = 0.4546, lr_0 = 6.9159e-04
Loss = 3.2515e-02, PNorm = 131.3343, GNorm = 0.3068, lr_0 = 6.9112e-04
Loss = 3.3399e-02, PNorm = 131.3960, GNorm = 0.3293, lr_0 = 6.9065e-04
Loss = 2.9794e-02, PNorm = 131.4535, GNorm = 0.2455, lr_0 = 6.9017e-04
Loss = 3.1905e-02, PNorm = 131.5217, GNorm = 0.4100, lr_0 = 6.8970e-04
Loss = 3.1080e-02, PNorm = 131.5814, GNorm = 0.2958, lr_0 = 6.8923e-04
Loss = 3.2930e-02, PNorm = 131.6454, GNorm = 0.5102, lr_0 = 6.8876e-04
Loss = 3.3865e-02, PNorm = 131.7017, GNorm = 0.5932, lr_0 = 6.8828e-04
Loss = 3.1319e-02, PNorm = 131.7668, GNorm = 0.2744, lr_0 = 6.8781e-04
Loss = 3.4500e-02, PNorm = 131.8316, GNorm = 0.2557, lr_0 = 6.8734e-04
Loss = 3.0961e-02, PNorm = 131.8978, GNorm = 0.4650, lr_0 = 6.8687e-04
Loss = 3.1214e-02, PNorm = 131.9598, GNorm = 0.6679, lr_0 = 6.8640e-04
Loss = 3.0327e-02, PNorm = 132.0237, GNorm = 0.4052, lr_0 = 6.8593e-04
Loss = 3.6430e-02, PNorm = 132.0865, GNorm = 0.7441, lr_0 = 6.8546e-04
Loss = 3.3345e-02, PNorm = 132.1487, GNorm = 0.3833, lr_0 = 6.8499e-04
Loss = 3.2991e-02, PNorm = 132.2107, GNorm = 0.5350, lr_0 = 6.8452e-04
Loss = 3.0433e-02, PNorm = 132.2831, GNorm = 0.3355, lr_0 = 6.8405e-04
Loss = 3.8078e-02, PNorm = 132.3476, GNorm = 0.5063, lr_0 = 6.8358e-04
Loss = 3.5406e-02, PNorm = 132.4091, GNorm = 0.4894, lr_0 = 6.8312e-04
Loss = 3.8219e-02, PNorm = 132.4760, GNorm = 0.6594, lr_0 = 6.8265e-04
Loss = 3.2415e-02, PNorm = 132.5359, GNorm = 0.2867, lr_0 = 6.8218e-04
Loss = 3.3267e-02, PNorm = 132.5971, GNorm = 0.5801, lr_0 = 6.8171e-04
Loss = 4.3204e-02, PNorm = 132.6627, GNorm = 1.0815, lr_0 = 6.8125e-04
Loss = 3.1229e-02, PNorm = 132.7345, GNorm = 0.2994, lr_0 = 6.8078e-04
Loss = 3.5054e-02, PNorm = 132.7939, GNorm = 0.7076, lr_0 = 6.8031e-04
Loss = 2.8716e-02, PNorm = 132.8569, GNorm = 0.2958, lr_0 = 6.7985e-04
Loss = 3.3148e-02, PNorm = 132.9129, GNorm = 0.3763, lr_0 = 6.7938e-04
Loss = 3.6503e-02, PNorm = 132.9817, GNorm = 0.7666, lr_0 = 6.7892e-04
Loss = 3.9341e-02, PNorm = 133.0464, GNorm = 0.3671, lr_0 = 6.7845e-04
Loss = 3.4680e-02, PNorm = 133.1160, GNorm = 0.2955, lr_0 = 6.7799e-04
Loss = 3.4105e-02, PNorm = 133.1798, GNorm = 0.2396, lr_0 = 6.7752e-04
Loss = 3.3973e-02, PNorm = 133.2423, GNorm = 0.5228, lr_0 = 6.7706e-04
Loss = 4.2399e-02, PNorm = 133.2983, GNorm = 0.3473, lr_0 = 6.7659e-04
Loss = 3.5642e-02, PNorm = 133.3641, GNorm = 0.5379, lr_0 = 6.7613e-04
Loss = 3.9003e-02, PNorm = 133.4319, GNorm = 0.5833, lr_0 = 6.7567e-04
Loss = 3.5756e-02, PNorm = 133.5026, GNorm = 0.2750, lr_0 = 6.7520e-04
Loss = 4.2223e-02, PNorm = 133.5739, GNorm = 0.2254, lr_0 = 6.7474e-04
Loss = 4.1581e-02, PNorm = 133.6431, GNorm = 0.3583, lr_0 = 6.7428e-04
Loss = 3.5675e-02, PNorm = 133.7189, GNorm = 0.7839, lr_0 = 6.7382e-04
Loss = 4.0023e-02, PNorm = 133.7927, GNorm = 0.6450, lr_0 = 6.7335e-04
Loss = 4.6411e-02, PNorm = 133.8676, GNorm = 1.1361, lr_0 = 6.7289e-04
Loss = 3.7403e-02, PNorm = 133.9428, GNorm = 0.3559, lr_0 = 6.7243e-04
Loss = 3.7947e-02, PNorm = 134.0179, GNorm = 0.3094, lr_0 = 6.7197e-04
Loss = 3.5162e-02, PNorm = 134.0888, GNorm = 0.2876, lr_0 = 6.7151e-04
Loss = 4.0584e-02, PNorm = 134.1523, GNorm = 1.1621, lr_0 = 6.7105e-04
Loss = 3.7982e-02, PNorm = 134.2179, GNorm = 0.9666, lr_0 = 6.7059e-04
Loss = 3.6235e-02, PNorm = 134.2940, GNorm = 0.3711, lr_0 = 6.7013e-04
Loss = 3.5416e-02, PNorm = 134.3621, GNorm = 0.3481, lr_0 = 6.6967e-04
Loss = 3.6128e-02, PNorm = 134.4409, GNorm = 0.4241, lr_0 = 6.6921e-04
Loss = 3.7008e-02, PNorm = 134.5094, GNorm = 0.4411, lr_0 = 6.6876e-04
Loss = 3.6600e-02, PNorm = 134.5860, GNorm = 0.5913, lr_0 = 6.6830e-04
Loss = 3.6587e-02, PNorm = 134.6578, GNorm = 0.2882, lr_0 = 6.6784e-04
Loss = 3.3873e-02, PNorm = 134.7302, GNorm = 0.3663, lr_0 = 6.6738e-04
Loss = 3.7083e-02, PNorm = 134.7991, GNorm = 0.5698, lr_0 = 6.6693e-04
Loss = 3.6925e-02, PNorm = 134.8747, GNorm = 0.2161, lr_0 = 6.6647e-04
Loss = 3.3175e-02, PNorm = 134.9521, GNorm = 0.2686, lr_0 = 6.6601e-04
Loss = 3.5267e-02, PNorm = 135.0182, GNorm = 0.4102, lr_0 = 6.6556e-04
Loss = 3.1290e-02, PNorm = 135.0891, GNorm = 0.6784, lr_0 = 6.6510e-04
Loss = 4.1127e-02, PNorm = 135.1607, GNorm = 0.8214, lr_0 = 6.6464e-04
Loss = 3.7690e-02, PNorm = 135.2401, GNorm = 0.8938, lr_0 = 6.6419e-04
Loss = 3.4079e-02, PNorm = 135.3156, GNorm = 0.3637, lr_0 = 6.6373e-04
Loss = 3.6701e-02, PNorm = 135.3955, GNorm = 0.4358, lr_0 = 6.6328e-04
Loss = 4.4264e-02, PNorm = 135.4624, GNorm = 0.5590, lr_0 = 6.6282e-04
Validation mae = 0.287667
Epoch 7
Loss = 3.5024e-02, PNorm = 135.5380, GNorm = 0.4689, lr_0 = 6.6237e-04
Loss = 2.5286e-02, PNorm = 135.5939, GNorm = 0.3435, lr_0 = 6.6192e-04
Loss = 2.4735e-02, PNorm = 135.6413, GNorm = 0.7708, lr_0 = 6.6146e-04
Loss = 3.3872e-02, PNorm = 135.6902, GNorm = 0.3407, lr_0 = 6.6101e-04
Loss = 2.8696e-02, PNorm = 135.7427, GNorm = 0.8513, lr_0 = 6.6056e-04
Loss = 2.9220e-02, PNorm = 135.7914, GNorm = 0.5144, lr_0 = 6.6011e-04
Loss = 2.8661e-02, PNorm = 135.8360, GNorm = 0.2893, lr_0 = 6.5965e-04
Loss = 3.0997e-02, PNorm = 135.8807, GNorm = 0.5975, lr_0 = 6.5920e-04
Loss = 3.3325e-02, PNorm = 135.9343, GNorm = 0.2481, lr_0 = 6.5875e-04
Loss = 3.2789e-02, PNorm = 135.9823, GNorm = 0.5616, lr_0 = 6.5830e-04
Loss = 3.1683e-02, PNorm = 136.0373, GNorm = 0.5024, lr_0 = 6.5785e-04
Loss = 2.3744e-02, PNorm = 136.0830, GNorm = 0.1848, lr_0 = 6.5740e-04
Loss = 3.0129e-02, PNorm = 136.1308, GNorm = 0.5903, lr_0 = 6.5695e-04
Loss = 2.5407e-02, PNorm = 136.1745, GNorm = 0.7481, lr_0 = 6.5650e-04
Loss = 2.6635e-02, PNorm = 136.2236, GNorm = 0.2530, lr_0 = 6.5605e-04
Loss = 2.6832e-02, PNorm = 136.2718, GNorm = 0.1814, lr_0 = 6.5560e-04
Loss = 2.3243e-02, PNorm = 136.3175, GNorm = 0.4165, lr_0 = 6.5515e-04
Loss = 2.5274e-02, PNorm = 136.3642, GNorm = 0.5108, lr_0 = 6.5470e-04
Loss = 2.7108e-02, PNorm = 136.4059, GNorm = 0.6874, lr_0 = 6.5425e-04
Loss = 2.7924e-02, PNorm = 136.4508, GNorm = 0.2637, lr_0 = 6.5380e-04
Loss = 3.1148e-02, PNorm = 136.4982, GNorm = 0.2546, lr_0 = 6.5335e-04
Loss = 2.9115e-02, PNorm = 136.5420, GNorm = 0.3909, lr_0 = 6.5291e-04
Loss = 2.3557e-02, PNorm = 136.5877, GNorm = 0.5165, lr_0 = 6.5246e-04
Loss = 2.4963e-02, PNorm = 136.6360, GNorm = 0.5138, lr_0 = 6.5201e-04
Loss = 2.9835e-02, PNorm = 136.6786, GNorm = 0.4835, lr_0 = 6.5157e-04
Loss = 2.5467e-02, PNorm = 136.7205, GNorm = 0.8123, lr_0 = 6.5112e-04
Loss = 2.2524e-02, PNorm = 136.7637, GNorm = 0.5622, lr_0 = 6.5067e-04
Loss = 2.5000e-02, PNorm = 136.8041, GNorm = 0.2734, lr_0 = 6.5023e-04
Loss = 2.8161e-02, PNorm = 136.8466, GNorm = 0.1572, lr_0 = 6.4978e-04
Loss = 2.9315e-02, PNorm = 136.8942, GNorm = 0.4663, lr_0 = 6.4934e-04
Loss = 2.8122e-02, PNorm = 136.9487, GNorm = 0.2557, lr_0 = 6.4889e-04
Loss = 2.2399e-02, PNorm = 136.9997, GNorm = 0.3840, lr_0 = 6.4845e-04
Loss = 2.6495e-02, PNorm = 137.0512, GNorm = 0.6996, lr_0 = 6.4800e-04
Loss = 2.4737e-02, PNorm = 137.0989, GNorm = 0.2251, lr_0 = 6.4756e-04
Loss = 2.3976e-02, PNorm = 137.1376, GNorm = 0.3887, lr_0 = 6.4712e-04
Loss = 2.6297e-02, PNorm = 137.1844, GNorm = 0.2200, lr_0 = 6.4667e-04
Loss = 2.6242e-02, PNorm = 137.2353, GNorm = 0.4461, lr_0 = 6.4623e-04
Loss = 2.7265e-02, PNorm = 137.2917, GNorm = 0.4275, lr_0 = 6.4579e-04
Loss = 2.7277e-02, PNorm = 137.3482, GNorm = 0.4083, lr_0 = 6.4534e-04
Loss = 2.5042e-02, PNorm = 137.3943, GNorm = 0.6014, lr_0 = 6.4490e-04
Loss = 2.8267e-02, PNorm = 137.4469, GNorm = 0.2429, lr_0 = 6.4446e-04
Loss = 3.1353e-02, PNorm = 137.4992, GNorm = 0.3418, lr_0 = 6.4402e-04
Loss = 2.3573e-02, PNorm = 137.5475, GNorm = 0.2177, lr_0 = 6.4358e-04
Loss = 3.1901e-02, PNorm = 137.5935, GNorm = 0.7254, lr_0 = 6.4314e-04
Loss = 2.1681e-02, PNorm = 137.6394, GNorm = 0.1905, lr_0 = 6.4270e-04
Loss = 2.6696e-02, PNorm = 137.6923, GNorm = 0.4428, lr_0 = 6.4226e-04
Loss = 2.5558e-02, PNorm = 137.7450, GNorm = 0.3981, lr_0 = 6.4182e-04
Loss = 2.3709e-02, PNorm = 137.7863, GNorm = 0.4457, lr_0 = 6.4138e-04
Loss = 2.7989e-02, PNorm = 137.8277, GNorm = 0.1994, lr_0 = 6.4094e-04
Loss = 2.5817e-02, PNorm = 137.8694, GNorm = 0.2860, lr_0 = 6.4050e-04
Loss = 2.8458e-02, PNorm = 137.9221, GNorm = 0.6140, lr_0 = 6.4006e-04
Loss = 2.5199e-02, PNorm = 137.9714, GNorm = 0.5818, lr_0 = 6.3962e-04
Loss = 3.1274e-02, PNorm = 138.0300, GNorm = 0.3855, lr_0 = 6.3918e-04
Loss = 2.7740e-02, PNorm = 138.0801, GNorm = 0.2753, lr_0 = 6.3874e-04
Loss = 2.5951e-02, PNorm = 138.1382, GNorm = 0.1950, lr_0 = 6.3831e-04
Loss = 2.6081e-02, PNorm = 138.1892, GNorm = 0.3909, lr_0 = 6.3787e-04
Loss = 2.5997e-02, PNorm = 138.2406, GNorm = 0.2754, lr_0 = 6.3743e-04
Loss = 2.7233e-02, PNorm = 138.2863, GNorm = 0.8282, lr_0 = 6.3700e-04
Loss = 2.7400e-02, PNorm = 138.3341, GNorm = 0.1587, lr_0 = 6.3656e-04
Loss = 3.1033e-02, PNorm = 138.3766, GNorm = 0.3924, lr_0 = 6.3612e-04
Loss = 2.9452e-02, PNorm = 138.4298, GNorm = 0.2553, lr_0 = 6.3569e-04
Loss = 2.9875e-02, PNorm = 138.4839, GNorm = 0.2975, lr_0 = 6.3525e-04
Loss = 2.6565e-02, PNorm = 138.5366, GNorm = 0.5979, lr_0 = 6.3482e-04
Loss = 2.8030e-02, PNorm = 138.5941, GNorm = 0.5119, lr_0 = 6.3438e-04
Loss = 2.7275e-02, PNorm = 138.6495, GNorm = 0.4126, lr_0 = 6.3395e-04
Loss = 2.4018e-02, PNorm = 138.7076, GNorm = 0.4236, lr_0 = 6.3351e-04
Loss = 2.6923e-02, PNorm = 138.7648, GNorm = 0.4115, lr_0 = 6.3308e-04
Loss = 2.6486e-02, PNorm = 138.8160, GNorm = 0.2383, lr_0 = 6.3265e-04
Loss = 2.4446e-02, PNorm = 138.8639, GNorm = 0.3752, lr_0 = 6.3221e-04
Loss = 2.4798e-02, PNorm = 138.9159, GNorm = 0.3474, lr_0 = 6.3178e-04
Loss = 2.5477e-02, PNorm = 138.9665, GNorm = 0.7698, lr_0 = 6.3135e-04
Loss = 2.5952e-02, PNorm = 139.0125, GNorm = 0.2636, lr_0 = 6.3091e-04
Loss = 2.4410e-02, PNorm = 139.0583, GNorm = 0.3755, lr_0 = 6.3048e-04
Loss = 2.7456e-02, PNorm = 139.1079, GNorm = 0.3405, lr_0 = 6.3005e-04
Loss = 2.8079e-02, PNorm = 139.1595, GNorm = 0.1558, lr_0 = 6.2962e-04
Loss = 2.9831e-02, PNorm = 139.2186, GNorm = 0.5441, lr_0 = 6.2919e-04
Loss = 3.2498e-02, PNorm = 139.2706, GNorm = 0.5233, lr_0 = 6.2876e-04
Loss = 2.4992e-02, PNorm = 139.3264, GNorm = 0.3757, lr_0 = 6.2833e-04
Loss = 2.7416e-02, PNorm = 139.3777, GNorm = 0.2916, lr_0 = 6.2789e-04
Loss = 2.5079e-02, PNorm = 139.4389, GNorm = 0.2021, lr_0 = 6.2746e-04
Loss = 2.7563e-02, PNorm = 139.4900, GNorm = 0.2523, lr_0 = 6.2703e-04
Loss = 2.4726e-02, PNorm = 139.5460, GNorm = 0.3719, lr_0 = 6.2661e-04
Loss = 2.6347e-02, PNorm = 139.5939, GNorm = 0.2204, lr_0 = 6.2618e-04
Loss = 3.1676e-02, PNorm = 139.6450, GNorm = 0.2231, lr_0 = 6.2575e-04
Loss = 2.4526e-02, PNorm = 139.7032, GNorm = 0.5072, lr_0 = 6.2532e-04
Loss = 3.0915e-02, PNorm = 139.7662, GNorm = 0.2806, lr_0 = 6.2489e-04
Loss = 2.3120e-02, PNorm = 139.8296, GNorm = 0.2530, lr_0 = 6.2446e-04
Loss = 2.2738e-02, PNorm = 139.8828, GNorm = 0.2523, lr_0 = 6.2403e-04
Loss = 2.4627e-02, PNorm = 139.9406, GNorm = 0.8292, lr_0 = 6.2361e-04
Loss = 2.3429e-02, PNorm = 139.9949, GNorm = 0.3195, lr_0 = 6.2318e-04
Loss = 2.3905e-02, PNorm = 140.0514, GNorm = 0.2625, lr_0 = 6.2275e-04
Loss = 2.8824e-02, PNorm = 140.1035, GNorm = 0.4693, lr_0 = 6.2233e-04
Loss = 3.0493e-02, PNorm = 140.1658, GNorm = 0.2942, lr_0 = 6.2190e-04
Loss = 2.8576e-02, PNorm = 140.2219, GNorm = 0.2060, lr_0 = 6.2147e-04
Loss = 2.7054e-02, PNorm = 140.2801, GNorm = 0.3496, lr_0 = 6.2105e-04
Loss = 2.7495e-02, PNorm = 140.3376, GNorm = 0.4991, lr_0 = 6.2062e-04
Loss = 2.7373e-02, PNorm = 140.3901, GNorm = 0.7993, lr_0 = 6.2020e-04
Loss = 2.9252e-02, PNorm = 140.4477, GNorm = 0.3670, lr_0 = 6.1977e-04
Loss = 2.3207e-02, PNorm = 140.5053, GNorm = 0.4085, lr_0 = 6.1935e-04
Loss = 2.4698e-02, PNorm = 140.5634, GNorm = 0.5148, lr_0 = 6.1892e-04
Loss = 3.0979e-02, PNorm = 140.6110, GNorm = 0.4089, lr_0 = 6.1850e-04
Loss = 2.4693e-02, PNorm = 140.6694, GNorm = 0.8723, lr_0 = 6.1808e-04
Loss = 2.8560e-02, PNorm = 140.7204, GNorm = 0.3479, lr_0 = 6.1765e-04
Loss = 2.7147e-02, PNorm = 140.7792, GNorm = 0.2212, lr_0 = 6.1723e-04
Loss = 2.7592e-02, PNorm = 140.8404, GNorm = 0.2963, lr_0 = 6.1681e-04
Loss = 2.4416e-02, PNorm = 140.9012, GNorm = 0.4748, lr_0 = 6.1638e-04
Loss = 2.7315e-02, PNorm = 140.9555, GNorm = 0.6778, lr_0 = 6.1596e-04
Loss = 2.5019e-02, PNorm = 141.0091, GNorm = 0.5062, lr_0 = 6.1554e-04
Loss = 2.8083e-02, PNorm = 141.0615, GNorm = 0.7116, lr_0 = 6.1512e-04
Loss = 2.6241e-02, PNorm = 141.1255, GNorm = 0.2674, lr_0 = 6.1470e-04
Loss = 2.6931e-02, PNorm = 141.1826, GNorm = 0.3693, lr_0 = 6.1428e-04
Loss = 3.1140e-02, PNorm = 141.2362, GNorm = 1.0973, lr_0 = 6.1385e-04
Loss = 2.6515e-02, PNorm = 141.2935, GNorm = 0.3648, lr_0 = 6.1343e-04
Loss = 3.5528e-02, PNorm = 141.3548, GNorm = 0.4178, lr_0 = 6.1301e-04
Loss = 2.9634e-02, PNorm = 141.4216, GNorm = 0.4526, lr_0 = 6.1259e-04
Loss = 2.4453e-02, PNorm = 141.4803, GNorm = 0.4598, lr_0 = 6.1217e-04
Loss = 3.1357e-02, PNorm = 141.5387, GNorm = 0.2258, lr_0 = 6.1175e-04
Loss = 2.9948e-02, PNorm = 141.5954, GNorm = 0.4362, lr_0 = 6.1134e-04
Loss = 2.4880e-02, PNorm = 141.6502, GNorm = 0.2567, lr_0 = 6.1092e-04
Loss = 2.5958e-02, PNorm = 141.7051, GNorm = 0.4467, lr_0 = 6.1050e-04
Validation mae = 0.285295
Epoch 8
Loss = 2.4272e-02, PNorm = 141.7487, GNorm = 0.3548, lr_0 = 6.1008e-04
Loss = 2.1011e-02, PNorm = 141.7927, GNorm = 0.2069, lr_0 = 6.0966e-04
Loss = 2.0798e-02, PNorm = 141.8324, GNorm = 0.4674, lr_0 = 6.0924e-04
Loss = 2.0935e-02, PNorm = 141.8747, GNorm = 0.3202, lr_0 = 6.0883e-04
Loss = 2.3181e-02, PNorm = 141.9101, GNorm = 0.4889, lr_0 = 6.0841e-04
Loss = 2.1278e-02, PNorm = 141.9434, GNorm = 0.6030, lr_0 = 6.0799e-04
Loss = 2.3071e-02, PNorm = 141.9812, GNorm = 0.4097, lr_0 = 6.0758e-04
Loss = 2.0348e-02, PNorm = 142.0216, GNorm = 0.6190, lr_0 = 6.0716e-04
Loss = 2.1814e-02, PNorm = 142.0618, GNorm = 0.4145, lr_0 = 6.0674e-04
Loss = 2.0565e-02, PNorm = 142.1029, GNorm = 0.4615, lr_0 = 6.0633e-04
Loss = 1.8891e-02, PNorm = 142.1322, GNorm = 0.1424, lr_0 = 6.0591e-04
Loss = 1.8667e-02, PNorm = 142.1724, GNorm = 0.4501, lr_0 = 6.0550e-04
Loss = 1.8044e-02, PNorm = 142.2144, GNorm = 0.1329, lr_0 = 6.0508e-04
Loss = 2.7977e-02, PNorm = 142.2520, GNorm = 0.5655, lr_0 = 6.0467e-04
Loss = 2.2263e-02, PNorm = 142.2965, GNorm = 0.3817, lr_0 = 6.0425e-04
Loss = 2.2959e-02, PNorm = 142.3330, GNorm = 0.5805, lr_0 = 6.0384e-04
Loss = 2.1188e-02, PNorm = 142.3719, GNorm = 0.2067, lr_0 = 6.0343e-04
Loss = 1.8766e-02, PNorm = 142.4136, GNorm = 0.5368, lr_0 = 6.0301e-04
Loss = 1.7519e-02, PNorm = 142.4542, GNorm = 0.2962, lr_0 = 6.0260e-04
Loss = 2.2338e-02, PNorm = 142.4909, GNorm = 0.6309, lr_0 = 6.0219e-04
Loss = 2.0712e-02, PNorm = 142.5298, GNorm = 0.2775, lr_0 = 6.0178e-04
Loss = 1.7257e-02, PNorm = 142.5683, GNorm = 0.3089, lr_0 = 6.0136e-04
Loss = 1.7810e-02, PNorm = 142.6004, GNorm = 0.5227, lr_0 = 6.0095e-04
Loss = 1.8854e-02, PNorm = 142.6302, GNorm = 0.3165, lr_0 = 6.0054e-04
Loss = 2.1693e-02, PNorm = 142.6580, GNorm = 0.6716, lr_0 = 6.0013e-04
Loss = 1.7616e-02, PNorm = 142.6966, GNorm = 0.4295, lr_0 = 5.9972e-04
Loss = 2.2409e-02, PNorm = 142.7335, GNorm = 0.3499, lr_0 = 5.9931e-04
Loss = 2.0603e-02, PNorm = 142.7730, GNorm = 0.1349, lr_0 = 5.9890e-04
Loss = 2.1020e-02, PNorm = 142.8081, GNorm = 0.1904, lr_0 = 5.9849e-04
Loss = 2.3092e-02, PNorm = 142.8405, GNorm = 0.6298, lr_0 = 5.9808e-04
Loss = 2.2507e-02, PNorm = 142.8758, GNorm = 0.4854, lr_0 = 5.9767e-04
Loss = 1.8308e-02, PNorm = 142.9174, GNorm = 0.4973, lr_0 = 5.9726e-04
Loss = 1.9182e-02, PNorm = 142.9586, GNorm = 0.5613, lr_0 = 5.9685e-04
Loss = 2.1211e-02, PNorm = 142.9981, GNorm = 0.6922, lr_0 = 5.9644e-04
Loss = 2.0166e-02, PNorm = 143.0374, GNorm = 0.1149, lr_0 = 5.9603e-04
Loss = 2.2205e-02, PNorm = 143.0745, GNorm = 0.1932, lr_0 = 5.9562e-04
Loss = 1.8753e-02, PNorm = 143.1175, GNorm = 0.2787, lr_0 = 5.9521e-04
Loss = 2.1975e-02, PNorm = 143.1573, GNorm = 0.3185, lr_0 = 5.9481e-04
Loss = 2.0348e-02, PNorm = 143.2004, GNorm = 0.2621, lr_0 = 5.9440e-04
Loss = 1.8616e-02, PNorm = 143.2300, GNorm = 0.2527, lr_0 = 5.9399e-04
Loss = 2.2270e-02, PNorm = 143.2647, GNorm = 0.8300, lr_0 = 5.9358e-04
Loss = 1.9308e-02, PNorm = 143.2966, GNorm = 0.1655, lr_0 = 5.9318e-04
Loss = 2.2882e-02, PNorm = 143.3267, GNorm = 0.5329, lr_0 = 5.9277e-04
Loss = 2.1350e-02, PNorm = 143.3698, GNorm = 0.1919, lr_0 = 5.9236e-04
Loss = 1.9463e-02, PNorm = 143.4158, GNorm = 0.7547, lr_0 = 5.9196e-04
Loss = 1.6323e-02, PNorm = 143.4604, GNorm = 0.2456, lr_0 = 5.9155e-04
Loss = 2.1646e-02, PNorm = 143.5004, GNorm = 0.3239, lr_0 = 5.9115e-04
Loss = 2.4863e-02, PNorm = 143.5372, GNorm = 1.0975, lr_0 = 5.9074e-04
Loss = 2.0465e-02, PNorm = 143.5804, GNorm = 0.2732, lr_0 = 5.9034e-04
Loss = 1.7471e-02, PNorm = 143.6239, GNorm = 0.3634, lr_0 = 5.8993e-04
Loss = 2.1901e-02, PNorm = 143.6577, GNorm = 0.5042, lr_0 = 5.8953e-04
Loss = 2.1689e-02, PNorm = 143.6970, GNorm = 0.3714, lr_0 = 5.8913e-04
Loss = 1.9040e-02, PNorm = 143.7358, GNorm = 0.5417, lr_0 = 5.8872e-04
Loss = 1.8332e-02, PNorm = 143.7801, GNorm = 0.6413, lr_0 = 5.8832e-04
Loss = 2.1331e-02, PNorm = 143.8212, GNorm = 0.1747, lr_0 = 5.8792e-04
Loss = 2.3516e-02, PNorm = 143.8581, GNorm = 0.8674, lr_0 = 5.8751e-04
Loss = 2.2302e-02, PNorm = 143.8951, GNorm = 0.3718, lr_0 = 5.8711e-04
Loss = 2.2744e-02, PNorm = 143.9433, GNorm = 0.3110, lr_0 = 5.8671e-04
Loss = 2.0809e-02, PNorm = 143.9896, GNorm = 0.9171, lr_0 = 5.8631e-04
Loss = 2.0956e-02, PNorm = 144.0339, GNorm = 0.2650, lr_0 = 5.8591e-04
Loss = 2.4253e-02, PNorm = 144.0790, GNorm = 0.2402, lr_0 = 5.8550e-04
Loss = 2.0375e-02, PNorm = 144.1239, GNorm = 0.6021, lr_0 = 5.8510e-04
Loss = 2.3053e-02, PNorm = 144.1693, GNorm = 0.3609, lr_0 = 5.8470e-04
Loss = 2.0561e-02, PNorm = 144.2136, GNorm = 0.2509, lr_0 = 5.8430e-04
Loss = 1.9537e-02, PNorm = 144.2579, GNorm = 0.1764, lr_0 = 5.8390e-04
Loss = 1.9605e-02, PNorm = 144.3014, GNorm = 0.3042, lr_0 = 5.8350e-04
Loss = 1.8128e-02, PNorm = 144.3471, GNorm = 0.4308, lr_0 = 5.8310e-04
Loss = 1.8389e-02, PNorm = 144.3932, GNorm = 0.3376, lr_0 = 5.8270e-04
Loss = 1.8846e-02, PNorm = 144.4386, GNorm = 0.4501, lr_0 = 5.8230e-04
Loss = 1.6133e-02, PNorm = 144.4755, GNorm = 0.3200, lr_0 = 5.8190e-04
Loss = 2.8302e-02, PNorm = 144.5034, GNorm = 0.1387, lr_0 = 5.8151e-04
Loss = 2.1464e-02, PNorm = 144.5421, GNorm = 0.6307, lr_0 = 5.8111e-04
Loss = 1.8839e-02, PNorm = 144.5894, GNorm = 0.4973, lr_0 = 5.8071e-04
Loss = 2.9134e-02, PNorm = 144.6329, GNorm = 0.6632, lr_0 = 5.8031e-04
Loss = 2.3811e-02, PNorm = 144.6792, GNorm = 0.4673, lr_0 = 5.7991e-04
Loss = 2.0271e-02, PNorm = 144.7309, GNorm = 0.1483, lr_0 = 5.7952e-04
Loss = 2.6370e-02, PNorm = 144.7881, GNorm = 0.3480, lr_0 = 5.7912e-04
Loss = 2.5753e-02, PNorm = 144.8349, GNorm = 0.3228, lr_0 = 5.7872e-04
Loss = 2.5639e-02, PNorm = 144.8876, GNorm = 0.6906, lr_0 = 5.7833e-04
Loss = 2.1870e-02, PNorm = 144.9379, GNorm = 0.2088, lr_0 = 5.7793e-04
Loss = 2.2489e-02, PNorm = 144.9827, GNorm = 0.4450, lr_0 = 5.7753e-04
Loss = 1.8102e-02, PNorm = 145.0332, GNorm = 1.0235, lr_0 = 5.7714e-04
Loss = 2.1723e-02, PNorm = 145.0844, GNorm = 0.7969, lr_0 = 5.7674e-04
Loss = 2.3918e-02, PNorm = 145.1239, GNorm = 0.5677, lr_0 = 5.7635e-04
Loss = 2.2063e-02, PNorm = 145.1768, GNorm = 0.8494, lr_0 = 5.7595e-04
Loss = 1.8196e-02, PNorm = 145.2254, GNorm = 0.3733, lr_0 = 5.7556e-04
Loss = 2.0024e-02, PNorm = 145.2724, GNorm = 0.6111, lr_0 = 5.7516e-04
Loss = 2.2806e-02, PNorm = 145.3215, GNorm = 0.2399, lr_0 = 5.7477e-04
Loss = 2.2617e-02, PNorm = 145.3735, GNorm = 0.3924, lr_0 = 5.7438e-04
Loss = 2.1807e-02, PNorm = 145.4290, GNorm = 0.9880, lr_0 = 5.7398e-04
Loss = 2.0556e-02, PNorm = 145.4760, GNorm = 0.1741, lr_0 = 5.7359e-04
Loss = 2.1994e-02, PNorm = 145.5237, GNorm = 0.5777, lr_0 = 5.7320e-04
Loss = 2.1364e-02, PNorm = 145.5744, GNorm = 0.4950, lr_0 = 5.7280e-04
Loss = 2.0570e-02, PNorm = 145.6228, GNorm = 0.3820, lr_0 = 5.7241e-04
Loss = 2.7142e-02, PNorm = 145.6722, GNorm = 0.3704, lr_0 = 5.7202e-04
Loss = 1.9219e-02, PNorm = 145.7205, GNorm = 0.3257, lr_0 = 5.7163e-04
Loss = 2.0789e-02, PNorm = 145.7700, GNorm = 0.3108, lr_0 = 5.7124e-04
Loss = 2.1516e-02, PNorm = 145.8227, GNorm = 0.3354, lr_0 = 5.7084e-04
Loss = 2.3248e-02, PNorm = 145.8683, GNorm = 0.4909, lr_0 = 5.7045e-04
Loss = 2.9643e-02, PNorm = 145.9134, GNorm = 0.4310, lr_0 = 5.7006e-04
Loss = 2.1749e-02, PNorm = 145.9667, GNorm = 0.3364, lr_0 = 5.6967e-04
Loss = 2.3796e-02, PNorm = 146.0232, GNorm = 0.3329, lr_0 = 5.6928e-04
Loss = 2.3035e-02, PNorm = 146.0793, GNorm = 0.3041, lr_0 = 5.6889e-04
Loss = 2.3587e-02, PNorm = 146.1313, GNorm = 0.5875, lr_0 = 5.6850e-04
Loss = 2.3198e-02, PNorm = 146.1808, GNorm = 0.6278, lr_0 = 5.6811e-04
Loss = 2.1250e-02, PNorm = 146.2321, GNorm = 0.7795, lr_0 = 5.6772e-04
Loss = 1.7448e-02, PNorm = 146.2818, GNorm = 0.1310, lr_0 = 5.6733e-04
Loss = 2.7599e-02, PNorm = 146.3304, GNorm = 0.5456, lr_0 = 5.6695e-04
Loss = 2.2747e-02, PNorm = 146.3809, GNorm = 0.3767, lr_0 = 5.6656e-04
Loss = 2.4084e-02, PNorm = 146.4325, GNorm = 0.5616, lr_0 = 5.6617e-04
Loss = 2.5986e-02, PNorm = 146.4843, GNorm = 0.4073, lr_0 = 5.6578e-04
Loss = 2.1243e-02, PNorm = 146.5359, GNorm = 0.3524, lr_0 = 5.6539e-04
Loss = 2.0476e-02, PNorm = 146.5865, GNorm = 0.2332, lr_0 = 5.6501e-04
Loss = 1.9797e-02, PNorm = 146.6392, GNorm = 0.4399, lr_0 = 5.6462e-04
Loss = 2.5722e-02, PNorm = 146.6836, GNorm = 0.3417, lr_0 = 5.6423e-04
Loss = 2.3497e-02, PNorm = 146.7359, GNorm = 0.5355, lr_0 = 5.6385e-04
Loss = 2.6478e-02, PNorm = 146.7813, GNorm = 0.1622, lr_0 = 5.6346e-04
Loss = 2.0262e-02, PNorm = 146.8311, GNorm = 0.2579, lr_0 = 5.6307e-04
Loss = 2.7465e-02, PNorm = 146.8778, GNorm = 0.4338, lr_0 = 5.6269e-04
Loss = 2.2849e-02, PNorm = 146.9312, GNorm = 0.4786, lr_0 = 5.6230e-04
Validation mae = 0.284177
Epoch 9
Loss = 1.7766e-02, PNorm = 146.9725, GNorm = 0.2405, lr_0 = 5.6192e-04
Loss = 1.9984e-02, PNorm = 147.0138, GNorm = 0.7078, lr_0 = 5.6153e-04
Loss = 1.9341e-02, PNorm = 147.0464, GNorm = 0.3275, lr_0 = 5.6115e-04
Loss = 2.1280e-02, PNorm = 147.0798, GNorm = 0.1911, lr_0 = 5.6076e-04
Loss = 1.9233e-02, PNorm = 147.1102, GNorm = 0.7076, lr_0 = 5.6038e-04
Loss = 1.6232e-02, PNorm = 147.1437, GNorm = 0.4890, lr_0 = 5.6000e-04
Loss = 1.6352e-02, PNorm = 147.1743, GNorm = 0.5704, lr_0 = 5.5961e-04
Loss = 1.4961e-02, PNorm = 147.2038, GNorm = 0.1794, lr_0 = 5.5923e-04
Loss = 1.9433e-02, PNorm = 147.2318, GNorm = 0.2221, lr_0 = 5.5885e-04
Loss = 1.7456e-02, PNorm = 147.2635, GNorm = 0.3224, lr_0 = 5.5846e-04
Loss = 1.6541e-02, PNorm = 147.3004, GNorm = 0.6827, lr_0 = 5.5808e-04
Loss = 2.0056e-02, PNorm = 147.3301, GNorm = 0.3404, lr_0 = 5.5770e-04
Loss = 1.7053e-02, PNorm = 147.3656, GNorm = 0.8493, lr_0 = 5.5732e-04
Loss = 1.7319e-02, PNorm = 147.3994, GNorm = 0.3992, lr_0 = 5.5693e-04
Loss = 1.6215e-02, PNorm = 147.4341, GNorm = 0.2925, lr_0 = 5.5655e-04
Loss = 1.7698e-02, PNorm = 147.4661, GNorm = 1.3290, lr_0 = 5.5617e-04
Loss = 1.8642e-02, PNorm = 147.4993, GNorm = 0.2685, lr_0 = 5.5579e-04
Loss = 1.7585e-02, PNorm = 147.5295, GNorm = 0.5428, lr_0 = 5.5541e-04
Loss = 1.6704e-02, PNorm = 147.5605, GNorm = 0.1428, lr_0 = 5.5503e-04
Loss = 1.7056e-02, PNorm = 147.5926, GNorm = 0.3085, lr_0 = 5.5465e-04
Loss = 1.6285e-02, PNorm = 147.6249, GNorm = 0.3832, lr_0 = 5.5427e-04
Loss = 1.4546e-02, PNorm = 147.6559, GNorm = 0.3681, lr_0 = 5.5389e-04
Loss = 1.8603e-02, PNorm = 147.6858, GNorm = 0.2813, lr_0 = 5.5351e-04
Loss = 1.6519e-02, PNorm = 147.7205, GNorm = 0.5730, lr_0 = 5.5313e-04
Loss = 1.7903e-02, PNorm = 147.7512, GNorm = 0.3238, lr_0 = 5.5275e-04
Loss = 1.5157e-02, PNorm = 147.7823, GNorm = 0.2373, lr_0 = 5.5237e-04
Loss = 1.6175e-02, PNorm = 147.8166, GNorm = 0.2990, lr_0 = 5.5199e-04
Loss = 1.8290e-02, PNorm = 147.8488, GNorm = 0.6292, lr_0 = 5.5162e-04
Loss = 1.6764e-02, PNorm = 147.8855, GNorm = 0.3194, lr_0 = 5.5124e-04
Loss = 1.6043e-02, PNorm = 147.9170, GNorm = 0.3809, lr_0 = 5.5086e-04
Loss = 1.6831e-02, PNorm = 147.9496, GNorm = 0.5270, lr_0 = 5.5048e-04
Loss = 1.5679e-02, PNorm = 147.9846, GNorm = 0.5297, lr_0 = 5.5011e-04
Loss = 1.6995e-02, PNorm = 148.0161, GNorm = 0.2634, lr_0 = 5.4973e-04
Loss = 1.7032e-02, PNorm = 148.0488, GNorm = 0.3727, lr_0 = 5.4935e-04
Loss = 1.6257e-02, PNorm = 148.0805, GNorm = 0.4302, lr_0 = 5.4898e-04
Loss = 1.6115e-02, PNorm = 148.1119, GNorm = 0.4284, lr_0 = 5.4860e-04
Loss = 1.6416e-02, PNorm = 148.1430, GNorm = 0.3565, lr_0 = 5.4822e-04
Loss = 1.5475e-02, PNorm = 148.1775, GNorm = 0.3742, lr_0 = 5.4785e-04
Loss = 1.8490e-02, PNorm = 148.2138, GNorm = 0.1987, lr_0 = 5.4747e-04
Loss = 1.5884e-02, PNorm = 148.2492, GNorm = 0.4258, lr_0 = 5.4710e-04
Loss = 1.6668e-02, PNorm = 148.2831, GNorm = 0.4442, lr_0 = 5.4672e-04
Loss = 1.3764e-02, PNorm = 148.3171, GNorm = 0.1949, lr_0 = 5.4635e-04
Loss = 1.8922e-02, PNorm = 148.3450, GNorm = 0.2975, lr_0 = 5.4597e-04
Loss = 1.6185e-02, PNorm = 148.3743, GNorm = 0.2965, lr_0 = 5.4560e-04
Loss = 1.5712e-02, PNorm = 148.4076, GNorm = 0.1497, lr_0 = 5.4523e-04
Loss = 1.6743e-02, PNorm = 148.4432, GNorm = 0.2837, lr_0 = 5.4485e-04
Loss = 1.5804e-02, PNorm = 148.4851, GNorm = 0.1988, lr_0 = 5.4448e-04
Loss = 1.7928e-02, PNorm = 148.5252, GNorm = 0.3876, lr_0 = 5.4411e-04
Loss = 2.2626e-02, PNorm = 148.5691, GNorm = 0.6617, lr_0 = 5.4373e-04
Loss = 2.0627e-02, PNorm = 148.6087, GNorm = 0.3277, lr_0 = 5.4336e-04
Loss = 1.8296e-02, PNorm = 148.6501, GNorm = 0.7807, lr_0 = 5.4299e-04
Loss = 1.3573e-02, PNorm = 148.6889, GNorm = 0.3468, lr_0 = 5.4262e-04
Loss = 1.4728e-02, PNorm = 148.7226, GNorm = 0.6037, lr_0 = 5.4225e-04
Loss = 2.0458e-02, PNorm = 148.7596, GNorm = 0.2304, lr_0 = 5.4187e-04
Loss = 1.7603e-02, PNorm = 148.7994, GNorm = 0.1216, lr_0 = 5.4150e-04
Loss = 1.6488e-02, PNorm = 148.8405, GNorm = 0.3230, lr_0 = 5.4113e-04
Loss = 1.5343e-02, PNorm = 148.8777, GNorm = 0.1335, lr_0 = 5.4076e-04
Loss = 1.6958e-02, PNorm = 148.9124, GNorm = 0.6390, lr_0 = 5.4039e-04
Loss = 1.6085e-02, PNorm = 148.9494, GNorm = 0.2551, lr_0 = 5.4002e-04
Loss = 1.5223e-02, PNorm = 148.9864, GNorm = 0.1961, lr_0 = 5.3965e-04
Loss = 1.4823e-02, PNorm = 149.0218, GNorm = 0.3190, lr_0 = 5.3928e-04
Loss = 1.7583e-02, PNorm = 149.0568, GNorm = 0.1163, lr_0 = 5.3891e-04
Loss = 1.8562e-02, PNorm = 149.0873, GNorm = 0.3171, lr_0 = 5.3854e-04
Loss = 2.0364e-02, PNorm = 149.1217, GNorm = 0.2574, lr_0 = 5.3817e-04
Loss = 1.6132e-02, PNorm = 149.1631, GNorm = 0.4132, lr_0 = 5.3781e-04
Loss = 1.7106e-02, PNorm = 149.2009, GNorm = 0.8480, lr_0 = 5.3744e-04
Loss = 1.5270e-02, PNorm = 149.2411, GNorm = 0.1490, lr_0 = 5.3707e-04
Loss = 1.6375e-02, PNorm = 149.2774, GNorm = 0.7131, lr_0 = 5.3670e-04
Loss = 1.5698e-02, PNorm = 149.3174, GNorm = 0.1399, lr_0 = 5.3633e-04
Loss = 1.6971e-02, PNorm = 149.3595, GNorm = 0.2851, lr_0 = 5.3597e-04
Loss = 1.5681e-02, PNorm = 149.3956, GNorm = 0.3315, lr_0 = 5.3560e-04
Loss = 1.6881e-02, PNorm = 149.4345, GNorm = 0.6925, lr_0 = 5.3523e-04
Loss = 1.3385e-02, PNorm = 149.4721, GNorm = 0.1847, lr_0 = 5.3486e-04
Loss = 1.9946e-02, PNorm = 149.5060, GNorm = 0.3185, lr_0 = 5.3450e-04
Loss = 1.9642e-02, PNorm = 149.5415, GNorm = 0.1599, lr_0 = 5.3413e-04
Loss = 2.2000e-02, PNorm = 149.5830, GNorm = 0.3188, lr_0 = 5.3377e-04
Loss = 2.0260e-02, PNorm = 149.6277, GNorm = 0.3202, lr_0 = 5.3340e-04
Loss = 1.5796e-02, PNorm = 149.6693, GNorm = 0.4471, lr_0 = 5.3304e-04
Loss = 1.9376e-02, PNorm = 149.7112, GNorm = 0.2496, lr_0 = 5.3267e-04
Loss = 2.1727e-02, PNorm = 149.7626, GNorm = 0.9002, lr_0 = 5.3231e-04
Loss = 1.9280e-02, PNorm = 149.8075, GNorm = 0.3724, lr_0 = 5.3194e-04
Loss = 1.5935e-02, PNorm = 149.8425, GNorm = 0.3468, lr_0 = 5.3158e-04
Loss = 1.5692e-02, PNorm = 149.8833, GNorm = 0.1991, lr_0 = 5.3121e-04
Loss = 1.8255e-02, PNorm = 149.9229, GNorm = 0.1925, lr_0 = 5.3085e-04
Loss = 1.7629e-02, PNorm = 149.9614, GNorm = 0.5163, lr_0 = 5.3048e-04
Loss = 2.1653e-02, PNorm = 149.9984, GNorm = 0.2802, lr_0 = 5.3012e-04
Loss = 1.7315e-02, PNorm = 150.0359, GNorm = 0.3241, lr_0 = 5.2976e-04
Loss = 1.6005e-02, PNorm = 150.0796, GNorm = 0.2324, lr_0 = 5.2939e-04
Loss = 1.6781e-02, PNorm = 150.1140, GNorm = 0.1709, lr_0 = 5.2903e-04
Loss = 1.6424e-02, PNorm = 150.1529, GNorm = 0.5539, lr_0 = 5.2867e-04
Loss = 1.5797e-02, PNorm = 150.1907, GNorm = 0.2386, lr_0 = 5.2831e-04
Loss = 2.2081e-02, PNorm = 150.2233, GNorm = 0.4789, lr_0 = 5.2795e-04
Loss = 1.4790e-02, PNorm = 150.2645, GNorm = 0.4689, lr_0 = 5.2758e-04
Loss = 1.8307e-02, PNorm = 150.3032, GNorm = 0.2145, lr_0 = 5.2722e-04
Loss = 1.3588e-02, PNorm = 150.3423, GNorm = 0.2716, lr_0 = 5.2686e-04
Loss = 1.5996e-02, PNorm = 150.3827, GNorm = 0.3791, lr_0 = 5.2650e-04
Loss = 1.8075e-02, PNorm = 150.4258, GNorm = 0.3216, lr_0 = 5.2614e-04
Loss = 1.6324e-02, PNorm = 150.4636, GNorm = 0.1799, lr_0 = 5.2578e-04
Loss = 1.5853e-02, PNorm = 150.4959, GNorm = 0.1771, lr_0 = 5.2542e-04
Loss = 2.1400e-02, PNorm = 150.5303, GNorm = 0.8244, lr_0 = 5.2506e-04
Loss = 1.4842e-02, PNorm = 150.5781, GNorm = 0.1665, lr_0 = 5.2470e-04
Loss = 1.8985e-02, PNorm = 150.6151, GNorm = 0.3703, lr_0 = 5.2434e-04
Loss = 1.5456e-02, PNorm = 150.6536, GNorm = 0.2155, lr_0 = 5.2398e-04
Loss = 1.5587e-02, PNorm = 150.6894, GNorm = 0.6588, lr_0 = 5.2362e-04
Loss = 1.7901e-02, PNorm = 150.7238, GNorm = 0.3027, lr_0 = 5.2326e-04
Loss = 1.7448e-02, PNorm = 150.7615, GNorm = 0.3261, lr_0 = 5.2290e-04
Loss = 1.7009e-02, PNorm = 150.8057, GNorm = 0.2135, lr_0 = 5.2255e-04
Loss = 1.8460e-02, PNorm = 150.8525, GNorm = 0.3677, lr_0 = 5.2219e-04
Loss = 1.8627e-02, PNorm = 150.9006, GNorm = 0.5218, lr_0 = 5.2183e-04
Loss = 1.8376e-02, PNorm = 150.9483, GNorm = 0.4264, lr_0 = 5.2147e-04
Loss = 1.9250e-02, PNorm = 150.9908, GNorm = 0.7309, lr_0 = 5.2112e-04
Loss = 1.9263e-02, PNorm = 151.0408, GNorm = 0.2794, lr_0 = 5.2076e-04
Loss = 2.0494e-02, PNorm = 151.0824, GNorm = 0.4057, lr_0 = 5.2040e-04
Loss = 1.9667e-02, PNorm = 151.1278, GNorm = 0.4057, lr_0 = 5.2005e-04
Loss = 1.7578e-02, PNorm = 151.1656, GNorm = 0.2289, lr_0 = 5.1969e-04
Loss = 2.0164e-02, PNorm = 151.2126, GNorm = 0.2351, lr_0 = 5.1933e-04
Loss = 1.8918e-02, PNorm = 151.2639, GNorm = 0.4304, lr_0 = 5.1898e-04
Loss = 1.4845e-02, PNorm = 151.3067, GNorm = 0.3506, lr_0 = 5.1862e-04
Loss = 1.5980e-02, PNorm = 151.3494, GNorm = 0.1383, lr_0 = 5.1827e-04
Loss = 1.9402e-02, PNorm = 151.3895, GNorm = 0.9106, lr_0 = 5.1791e-04
Validation mae = 0.283632
Epoch 10
Loss = 1.7354e-02, PNorm = 151.4282, GNorm = 0.4574, lr_0 = 5.1756e-04
Loss = 1.5447e-02, PNorm = 151.4533, GNorm = 0.2521, lr_0 = 5.1720e-04
Loss = 1.7649e-02, PNorm = 151.4816, GNorm = 0.2231, lr_0 = 5.1685e-04
Loss = 1.4188e-02, PNorm = 151.5100, GNorm = 0.1865, lr_0 = 5.1649e-04
Loss = 1.4617e-02, PNorm = 151.5332, GNorm = 0.2499, lr_0 = 5.1614e-04
Loss = 1.6165e-02, PNorm = 151.5586, GNorm = 0.4510, lr_0 = 5.1579e-04
Loss = 1.5248e-02, PNorm = 151.5894, GNorm = 0.2206, lr_0 = 5.1543e-04
Loss = 1.4447e-02, PNorm = 151.6179, GNorm = 0.1599, lr_0 = 5.1508e-04
Loss = 1.5183e-02, PNorm = 151.6444, GNorm = 0.4983, lr_0 = 5.1473e-04
Loss = 1.4367e-02, PNorm = 151.6701, GNorm = 0.4146, lr_0 = 5.1437e-04
Loss = 1.3475e-02, PNorm = 151.6937, GNorm = 0.1676, lr_0 = 5.1402e-04
Loss = 1.1988e-02, PNorm = 151.7199, GNorm = 0.2755, lr_0 = 5.1367e-04
Loss = 1.3635e-02, PNorm = 151.7481, GNorm = 0.3615, lr_0 = 5.1332e-04
Loss = 1.5024e-02, PNorm = 151.7789, GNorm = 0.4218, lr_0 = 5.1297e-04
Loss = 1.5258e-02, PNorm = 151.8016, GNorm = 0.2813, lr_0 = 5.1262e-04
Loss = 1.4891e-02, PNorm = 151.8268, GNorm = 0.2503, lr_0 = 5.1226e-04
Loss = 1.5226e-02, PNorm = 151.8523, GNorm = 0.3193, lr_0 = 5.1191e-04
Loss = 1.4690e-02, PNorm = 151.8825, GNorm = 0.3340, lr_0 = 5.1156e-04
Loss = 1.2573e-02, PNorm = 151.9056, GNorm = 0.2798, lr_0 = 5.1121e-04
Loss = 1.2841e-02, PNorm = 151.9276, GNorm = 0.1487, lr_0 = 5.1086e-04
Loss = 1.4356e-02, PNorm = 151.9505, GNorm = 0.3116, lr_0 = 5.1051e-04
Loss = 1.2659e-02, PNorm = 151.9773, GNorm = 0.2722, lr_0 = 5.1016e-04
Loss = 1.4438e-02, PNorm = 152.0043, GNorm = 0.4544, lr_0 = 5.0981e-04
Loss = 1.3867e-02, PNorm = 152.0345, GNorm = 0.1941, lr_0 = 5.0946e-04
Loss = 1.4387e-02, PNorm = 152.0606, GNorm = 0.4041, lr_0 = 5.0911e-04
Loss = 1.3907e-02, PNorm = 152.0936, GNorm = 0.1609, lr_0 = 5.0877e-04
Loss = 1.3863e-02, PNorm = 152.1202, GNorm = 0.1751, lr_0 = 5.0842e-04
Loss = 1.3447e-02, PNorm = 152.1446, GNorm = 0.1799, lr_0 = 5.0807e-04
Loss = 1.4276e-02, PNorm = 152.1726, GNorm = 0.4190, lr_0 = 5.0772e-04
Loss = 1.4517e-02, PNorm = 152.2003, GNorm = 0.3841, lr_0 = 5.0737e-04
Loss = 1.4492e-02, PNorm = 152.2252, GNorm = 0.3171, lr_0 = 5.0703e-04
Loss = 1.6911e-02, PNorm = 152.2595, GNorm = 0.6661, lr_0 = 5.0668e-04
Loss = 1.4095e-02, PNorm = 152.2949, GNorm = 0.1881, lr_0 = 5.0633e-04
Loss = 1.5145e-02, PNorm = 152.3241, GNorm = 0.2286, lr_0 = 5.0598e-04
Loss = 1.5189e-02, PNorm = 152.3494, GNorm = 0.3806, lr_0 = 5.0564e-04
Loss = 1.4680e-02, PNorm = 152.3783, GNorm = 0.2995, lr_0 = 5.0529e-04
Loss = 1.3095e-02, PNorm = 152.4084, GNorm = 0.3587, lr_0 = 5.0494e-04
Loss = 1.4218e-02, PNorm = 152.4373, GNorm = 0.3606, lr_0 = 5.0460e-04
Loss = 1.3734e-02, PNorm = 152.4689, GNorm = 0.3114, lr_0 = 5.0425e-04
Loss = 1.2179e-02, PNorm = 152.4966, GNorm = 0.2236, lr_0 = 5.0391e-04
Loss = 1.6714e-02, PNorm = 152.5271, GNorm = 0.2374, lr_0 = 5.0356e-04
Loss = 1.6479e-02, PNorm = 152.5573, GNorm = 0.3436, lr_0 = 5.0322e-04
Loss = 1.6107e-02, PNorm = 152.5886, GNorm = 0.3181, lr_0 = 5.0287e-04
Loss = 1.5285e-02, PNorm = 152.6168, GNorm = 0.2639, lr_0 = 5.0253e-04
Loss = 1.4316e-02, PNorm = 152.6434, GNorm = 0.2602, lr_0 = 5.0218e-04
Loss = 1.5263e-02, PNorm = 152.6731, GNorm = 0.4036, lr_0 = 5.0184e-04
Loss = 1.5426e-02, PNorm = 152.7028, GNorm = 0.2467, lr_0 = 5.0150e-04
Loss = 1.4172e-02, PNorm = 152.7412, GNorm = 0.3859, lr_0 = 5.0115e-04
Loss = 1.4610e-02, PNorm = 152.7731, GNorm = 0.6932, lr_0 = 5.0081e-04
Loss = 1.3178e-02, PNorm = 152.8048, GNorm = 0.1566, lr_0 = 5.0047e-04
Loss = 1.3322e-02, PNorm = 152.8363, GNorm = 0.4454, lr_0 = 5.0012e-04
Loss = 1.4570e-02, PNorm = 152.8669, GNorm = 0.7932, lr_0 = 4.9978e-04
Loss = 1.3334e-02, PNorm = 152.8996, GNorm = 0.2230, lr_0 = 4.9944e-04
Loss = 1.3691e-02, PNorm = 152.9272, GNorm = 0.7941, lr_0 = 4.9910e-04
Loss = 1.2532e-02, PNorm = 152.9606, GNorm = 0.3703, lr_0 = 4.9875e-04
Loss = 1.4592e-02, PNorm = 152.9937, GNorm = 0.5647, lr_0 = 4.9841e-04
Loss = 1.5702e-02, PNorm = 153.0240, GNorm = 0.4010, lr_0 = 4.9807e-04
Loss = 1.3267e-02, PNorm = 153.0522, GNorm = 0.6203, lr_0 = 4.9773e-04
Loss = 1.1851e-02, PNorm = 153.0793, GNorm = 0.2124, lr_0 = 4.9739e-04
Loss = 1.3443e-02, PNorm = 153.1089, GNorm = 0.4286, lr_0 = 4.9705e-04
Loss = 1.3919e-02, PNorm = 153.1407, GNorm = 0.1952, lr_0 = 4.9671e-04
Loss = 1.1820e-02, PNorm = 153.1655, GNorm = 0.4678, lr_0 = 4.9637e-04
Loss = 1.7327e-02, PNorm = 153.1887, GNorm = 0.1352, lr_0 = 4.9603e-04
Loss = 1.2622e-02, PNorm = 153.2191, GNorm = 0.4228, lr_0 = 4.9569e-04
Loss = 1.2922e-02, PNorm = 153.2532, GNorm = 0.3777, lr_0 = 4.9535e-04
Loss = 1.2474e-02, PNorm = 153.2844, GNorm = 0.1977, lr_0 = 4.9501e-04
Loss = 1.7785e-02, PNorm = 153.3122, GNorm = 0.3946, lr_0 = 4.9467e-04
Loss = 1.2026e-02, PNorm = 153.3377, GNorm = 0.3096, lr_0 = 4.9433e-04
Loss = 1.2332e-02, PNorm = 153.3650, GNorm = 0.3462, lr_0 = 4.9399e-04
Loss = 1.6909e-02, PNorm = 153.3959, GNorm = 0.4780, lr_0 = 4.9365e-04
Loss = 1.4761e-02, PNorm = 153.4344, GNorm = 0.1295, lr_0 = 4.9332e-04
Loss = 1.8410e-02, PNorm = 153.4692, GNorm = 0.8573, lr_0 = 4.9298e-04
Loss = 1.6386e-02, PNorm = 153.5056, GNorm = 0.3701, lr_0 = 4.9264e-04
Loss = 1.6328e-02, PNorm = 153.5399, GNorm = 0.5102, lr_0 = 4.9230e-04
Loss = 1.4635e-02, PNorm = 153.5687, GNorm = 0.3078, lr_0 = 4.9197e-04
Loss = 1.3889e-02, PNorm = 153.6026, GNorm = 0.3326, lr_0 = 4.9163e-04
Loss = 1.3674e-02, PNorm = 153.6334, GNorm = 0.2431, lr_0 = 4.9129e-04
Loss = 1.7100e-02, PNorm = 153.6644, GNorm = 0.3904, lr_0 = 4.9095e-04
Loss = 1.4839e-02, PNorm = 153.6950, GNorm = 0.3137, lr_0 = 4.9062e-04
Loss = 1.7348e-02, PNorm = 153.7319, GNorm = 0.3586, lr_0 = 4.9028e-04
Loss = 1.3174e-02, PNorm = 153.7675, GNorm = 0.3043, lr_0 = 4.8995e-04
Loss = 1.4109e-02, PNorm = 153.7974, GNorm = 0.1923, lr_0 = 4.8961e-04
Loss = 1.2861e-02, PNorm = 153.8269, GNorm = 0.6076, lr_0 = 4.8928e-04
Loss = 1.4481e-02, PNorm = 153.8575, GNorm = 0.1628, lr_0 = 4.8894e-04
Loss = 1.1819e-02, PNorm = 153.8856, GNorm = 0.1758, lr_0 = 4.8861e-04
Loss = 1.4449e-02, PNorm = 153.9176, GNorm = 0.5243, lr_0 = 4.8827e-04
Loss = 1.5238e-02, PNorm = 153.9490, GNorm = 0.4534, lr_0 = 4.8794e-04
Loss = 1.2945e-02, PNorm = 153.9828, GNorm = 0.3317, lr_0 = 4.8760e-04
Loss = 1.3811e-02, PNorm = 154.0157, GNorm = 0.6304, lr_0 = 4.8727e-04
Loss = 1.7742e-02, PNorm = 154.0473, GNorm = 0.8423, lr_0 = 4.8693e-04
Loss = 1.2698e-02, PNorm = 154.0806, GNorm = 0.1615, lr_0 = 4.8660e-04
Loss = 1.3675e-02, PNorm = 154.1191, GNorm = 0.3331, lr_0 = 4.8627e-04
Loss = 1.9119e-02, PNorm = 154.1478, GNorm = 0.4014, lr_0 = 4.8593e-04
Loss = 1.5977e-02, PNorm = 154.1785, GNorm = 0.6860, lr_0 = 4.8560e-04
Loss = 1.6951e-02, PNorm = 154.2095, GNorm = 0.5618, lr_0 = 4.8527e-04
Loss = 1.3680e-02, PNorm = 154.2466, GNorm = 0.6845, lr_0 = 4.8494e-04
Loss = 1.4173e-02, PNorm = 154.2833, GNorm = 0.6579, lr_0 = 4.8460e-04
Loss = 1.6146e-02, PNorm = 154.3187, GNorm = 0.4179, lr_0 = 4.8427e-04
Loss = 1.4495e-02, PNorm = 154.3521, GNorm = 0.2625, lr_0 = 4.8394e-04
Loss = 1.4445e-02, PNorm = 154.3812, GNorm = 0.1980, lr_0 = 4.8361e-04
Loss = 1.4101e-02, PNorm = 154.4116, GNorm = 0.1922, lr_0 = 4.8328e-04
Loss = 1.4129e-02, PNorm = 154.4407, GNorm = 0.1252, lr_0 = 4.8295e-04
Loss = 1.5150e-02, PNorm = 154.4760, GNorm = 0.3258, lr_0 = 4.8262e-04
Loss = 1.6476e-02, PNorm = 154.5087, GNorm = 0.1227, lr_0 = 4.8228e-04
Loss = 1.7811e-02, PNorm = 154.5412, GNorm = 0.2846, lr_0 = 4.8195e-04
Loss = 1.2476e-02, PNorm = 154.5745, GNorm = 0.1491, lr_0 = 4.8162e-04
Loss = 1.6001e-02, PNorm = 154.6069, GNorm = 0.3843, lr_0 = 4.8129e-04
Loss = 1.2489e-02, PNorm = 154.6405, GNorm = 0.4122, lr_0 = 4.8096e-04
Loss = 1.4352e-02, PNorm = 154.6777, GNorm = 0.1499, lr_0 = 4.8064e-04
Loss = 1.3310e-02, PNorm = 154.7133, GNorm = 0.2238, lr_0 = 4.8031e-04
Loss = 1.5869e-02, PNorm = 154.7451, GNorm = 0.2918, lr_0 = 4.7998e-04
Loss = 1.4603e-02, PNorm = 154.7756, GNorm = 0.1689, lr_0 = 4.7965e-04
Loss = 1.4357e-02, PNorm = 154.8020, GNorm = 0.2586, lr_0 = 4.7932e-04
Loss = 1.5773e-02, PNorm = 154.8335, GNorm = 0.1638, lr_0 = 4.7899e-04
Loss = 1.7998e-02, PNorm = 154.8603, GNorm = 0.4589, lr_0 = 4.7866e-04
Loss = 1.4420e-02, PNorm = 154.8953, GNorm = 0.4170, lr_0 = 4.7833e-04
Loss = 1.5799e-02, PNorm = 154.9326, GNorm = 0.6026, lr_0 = 4.7801e-04
Loss = 1.3658e-02, PNorm = 154.9695, GNorm = 0.2681, lr_0 = 4.7768e-04
Loss = 1.3266e-02, PNorm = 155.0045, GNorm = 0.2710, lr_0 = 4.7735e-04
Loss = 1.3819e-02, PNorm = 155.0385, GNorm = 0.3802, lr_0 = 4.7703e-04
Validation mae = 0.281147
Epoch 11
Loss = 1.1930e-02, PNorm = 155.0634, GNorm = 0.4080, lr_0 = 4.7670e-04
Loss = 1.3098e-02, PNorm = 155.0874, GNorm = 0.2883, lr_0 = 4.7637e-04
Loss = 1.3024e-02, PNorm = 155.1133, GNorm = 0.1291, lr_0 = 4.7605e-04
Loss = 1.4468e-02, PNorm = 155.1383, GNorm = 0.5683, lr_0 = 4.7572e-04
Loss = 1.3054e-02, PNorm = 155.1654, GNorm = 0.2567, lr_0 = 4.7539e-04
Loss = 1.3274e-02, PNorm = 155.1854, GNorm = 0.4090, lr_0 = 4.7507e-04
Loss = 1.2711e-02, PNorm = 155.2087, GNorm = 0.1751, lr_0 = 4.7474e-04
Loss = 1.0902e-02, PNorm = 155.2339, GNorm = 0.4246, lr_0 = 4.7442e-04
Loss = 1.2983e-02, PNorm = 155.2564, GNorm = 0.2261, lr_0 = 4.7409e-04
Loss = 1.2468e-02, PNorm = 155.2789, GNorm = 0.2539, lr_0 = 4.7377e-04
Loss = 1.1119e-02, PNorm = 155.3005, GNorm = 0.3085, lr_0 = 4.7344e-04
Loss = 1.2132e-02, PNorm = 155.3202, GNorm = 0.3946, lr_0 = 4.7312e-04
Loss = 1.4973e-02, PNorm = 155.3444, GNorm = 0.2812, lr_0 = 4.7279e-04
Loss = 1.2969e-02, PNorm = 155.3685, GNorm = 0.2475, lr_0 = 4.7247e-04
Loss = 1.3707e-02, PNorm = 155.3942, GNorm = 0.2670, lr_0 = 4.7215e-04
Loss = 1.3803e-02, PNorm = 155.4147, GNorm = 0.6362, lr_0 = 4.7182e-04
Loss = 1.3852e-02, PNorm = 155.4361, GNorm = 0.1748, lr_0 = 4.7150e-04
Loss = 1.0030e-02, PNorm = 155.4608, GNorm = 0.1856, lr_0 = 4.7118e-04
Loss = 1.1036e-02, PNorm = 155.4834, GNorm = 0.1365, lr_0 = 4.7085e-04
Loss = 1.1434e-02, PNorm = 155.5057, GNorm = 0.4458, lr_0 = 4.7053e-04
Loss = 1.0884e-02, PNorm = 155.5322, GNorm = 0.3356, lr_0 = 4.7021e-04
Loss = 1.0568e-02, PNorm = 155.5592, GNorm = 0.1581, lr_0 = 4.6989e-04
Loss = 1.0051e-02, PNorm = 155.5846, GNorm = 0.2754, lr_0 = 4.6957e-04
Loss = 1.1975e-02, PNorm = 155.6055, GNorm = 0.3929, lr_0 = 4.6924e-04
Loss = 1.1278e-02, PNorm = 155.6271, GNorm = 0.2010, lr_0 = 4.6892e-04
Loss = 1.4213e-02, PNorm = 155.6489, GNorm = 0.3651, lr_0 = 4.6860e-04
Loss = 1.1185e-02, PNorm = 155.6703, GNorm = 0.3828, lr_0 = 4.6828e-04
Loss = 1.2282e-02, PNorm = 155.6955, GNorm = 0.3025, lr_0 = 4.6796e-04
Loss = 1.4405e-02, PNorm = 155.7185, GNorm = 0.2390, lr_0 = 4.6764e-04
Loss = 1.1744e-02, PNorm = 155.7417, GNorm = 0.2996, lr_0 = 4.6732e-04
Loss = 1.3526e-02, PNorm = 155.7688, GNorm = 0.1872, lr_0 = 4.6700e-04
Loss = 1.2328e-02, PNorm = 155.7969, GNorm = 0.4036, lr_0 = 4.6668e-04
Loss = 1.1681e-02, PNorm = 155.8209, GNorm = 0.3579, lr_0 = 4.6636e-04
Loss = 1.2126e-02, PNorm = 155.8449, GNorm = 0.2424, lr_0 = 4.6604e-04
Loss = 1.1218e-02, PNorm = 155.8688, GNorm = 0.3795, lr_0 = 4.6572e-04
Loss = 1.0564e-02, PNorm = 155.8905, GNorm = 0.3117, lr_0 = 4.6540e-04
Loss = 1.0431e-02, PNorm = 155.9146, GNorm = 0.3747, lr_0 = 4.6508e-04
Loss = 1.2641e-02, PNorm = 155.9358, GNorm = 0.2217, lr_0 = 4.6476e-04
Loss = 1.1458e-02, PNorm = 155.9563, GNorm = 0.2477, lr_0 = 4.6445e-04
Loss = 1.5402e-02, PNorm = 155.9761, GNorm = 0.2338, lr_0 = 4.6413e-04
Loss = 1.3165e-02, PNorm = 156.0011, GNorm = 0.3082, lr_0 = 4.6381e-04
Loss = 1.1575e-02, PNorm = 156.0278, GNorm = 0.1073, lr_0 = 4.6349e-04
Loss = 1.0415e-02, PNorm = 156.0503, GNorm = 0.1712, lr_0 = 4.6317e-04
Loss = 9.0212e-03, PNorm = 156.0738, GNorm = 0.1862, lr_0 = 4.6286e-04
Loss = 1.2252e-02, PNorm = 156.0979, GNorm = 0.2830, lr_0 = 4.6254e-04
Loss = 1.1574e-02, PNorm = 156.1211, GNorm = 0.1703, lr_0 = 4.6222e-04
Loss = 1.0211e-02, PNorm = 156.1476, GNorm = 0.4592, lr_0 = 4.6191e-04
Loss = 1.1226e-02, PNorm = 156.1712, GNorm = 0.7607, lr_0 = 4.6159e-04
Loss = 8.5071e-03, PNorm = 156.1949, GNorm = 0.1956, lr_0 = 4.6127e-04
Loss = 9.9321e-03, PNorm = 156.2162, GNorm = 0.3393, lr_0 = 4.6096e-04
Loss = 1.2768e-02, PNorm = 156.2423, GNorm = 0.8590, lr_0 = 4.6064e-04
Loss = 1.7152e-02, PNorm = 156.2650, GNorm = 0.1940, lr_0 = 4.6033e-04
Loss = 1.0014e-02, PNorm = 156.2927, GNorm = 0.2009, lr_0 = 4.6001e-04
Loss = 1.1476e-02, PNorm = 156.3221, GNorm = 0.3437, lr_0 = 4.5970e-04
Loss = 1.2857e-02, PNorm = 156.3516, GNorm = 0.3919, lr_0 = 4.5938e-04
Loss = 1.1590e-02, PNorm = 156.3807, GNorm = 0.3059, lr_0 = 4.5907e-04
Loss = 1.1391e-02, PNorm = 156.4065, GNorm = 0.3085, lr_0 = 4.5875e-04
Loss = 1.1345e-02, PNorm = 156.4305, GNorm = 0.3124, lr_0 = 4.5844e-04
Loss = 1.0123e-02, PNorm = 156.4509, GNorm = 0.1891, lr_0 = 4.5812e-04
Loss = 1.2033e-02, PNorm = 156.4716, GNorm = 0.4660, lr_0 = 4.5781e-04
Loss = 1.0249e-02, PNorm = 156.4949, GNorm = 0.2965, lr_0 = 4.5750e-04
Loss = 1.1521e-02, PNorm = 156.5170, GNorm = 0.2835, lr_0 = 4.5718e-04
Loss = 1.1078e-02, PNorm = 156.5418, GNorm = 0.5674, lr_0 = 4.5687e-04
Loss = 1.1188e-02, PNorm = 156.5678, GNorm = 0.2899, lr_0 = 4.5656e-04
Loss = 1.0886e-02, PNorm = 156.5941, GNorm = 0.1489, lr_0 = 4.5624e-04
Loss = 1.2665e-02, PNorm = 156.6235, GNorm = 0.2251, lr_0 = 4.5593e-04
Loss = 1.2372e-02, PNorm = 156.6478, GNorm = 0.4299, lr_0 = 4.5562e-04
Loss = 1.1115e-02, PNorm = 156.6754, GNorm = 0.2079, lr_0 = 4.5531e-04
Loss = 1.1857e-02, PNorm = 156.7041, GNorm = 0.3510, lr_0 = 4.5499e-04
Loss = 1.0422e-02, PNorm = 156.7296, GNorm = 0.1371, lr_0 = 4.5468e-04
Loss = 9.7952e-03, PNorm = 156.7525, GNorm = 0.3079, lr_0 = 4.5437e-04
Loss = 1.3059e-02, PNorm = 156.7754, GNorm = 0.2376, lr_0 = 4.5406e-04
Loss = 1.3663e-02, PNorm = 156.8036, GNorm = 0.2417, lr_0 = 4.5375e-04
Loss = 1.1085e-02, PNorm = 156.8316, GNorm = 0.4356, lr_0 = 4.5344e-04
Loss = 1.0509e-02, PNorm = 156.8573, GNorm = 0.2196, lr_0 = 4.5313e-04
Loss = 1.3454e-02, PNorm = 156.8851, GNorm = 0.4229, lr_0 = 4.5282e-04
Loss = 1.1032e-02, PNorm = 156.9071, GNorm = 0.2778, lr_0 = 4.5251e-04
Loss = 1.0449e-02, PNorm = 156.9358, GNorm = 0.1516, lr_0 = 4.5220e-04
Loss = 1.3933e-02, PNorm = 156.9609, GNorm = 0.6506, lr_0 = 4.5189e-04
Loss = 1.1290e-02, PNorm = 156.9885, GNorm = 0.2759, lr_0 = 4.5158e-04
Loss = 1.1082e-02, PNorm = 157.0185, GNorm = 0.3818, lr_0 = 4.5127e-04
Loss = 1.2114e-02, PNorm = 157.0477, GNorm = 0.2419, lr_0 = 4.5096e-04
Loss = 1.3482e-02, PNorm = 157.0755, GNorm = 0.8138, lr_0 = 4.5065e-04
Loss = 1.5335e-02, PNorm = 157.1037, GNorm = 0.4304, lr_0 = 4.5034e-04
Loss = 1.0914e-02, PNorm = 157.1309, GNorm = 0.2870, lr_0 = 4.5003e-04
Loss = 1.0030e-02, PNorm = 157.1581, GNorm = 0.1532, lr_0 = 4.4972e-04
Loss = 1.1228e-02, PNorm = 157.1830, GNorm = 0.1444, lr_0 = 4.4942e-04
Loss = 1.2223e-02, PNorm = 157.2045, GNorm = 0.4159, lr_0 = 4.4911e-04
Loss = 1.0436e-02, PNorm = 157.2314, GNorm = 0.8497, lr_0 = 4.4880e-04
Loss = 1.0788e-02, PNorm = 157.2635, GNorm = 0.3101, lr_0 = 4.4849e-04
Loss = 1.0204e-02, PNorm = 157.2918, GNorm = 0.4894, lr_0 = 4.4819e-04
Loss = 1.0522e-02, PNorm = 157.3192, GNorm = 0.2242, lr_0 = 4.4788e-04
Loss = 1.7411e-02, PNorm = 157.3493, GNorm = 0.4604, lr_0 = 4.4757e-04
Loss = 9.9402e-03, PNorm = 157.3754, GNorm = 0.2352, lr_0 = 4.4727e-04
Loss = 1.0089e-02, PNorm = 157.4052, GNorm = 0.5976, lr_0 = 4.4696e-04
Loss = 1.1350e-02, PNorm = 157.4311, GNorm = 0.4470, lr_0 = 4.4665e-04
Loss = 1.2928e-02, PNorm = 157.4557, GNorm = 0.2484, lr_0 = 4.4635e-04
Loss = 1.4953e-02, PNorm = 157.4796, GNorm = 0.3667, lr_0 = 4.4604e-04
Loss = 1.5248e-02, PNorm = 157.5075, GNorm = 0.4924, lr_0 = 4.4574e-04
Loss = 1.5680e-02, PNorm = 157.5297, GNorm = 0.4253, lr_0 = 4.4543e-04
Loss = 1.7178e-02, PNorm = 157.5610, GNorm = 0.1647, lr_0 = 4.4513e-04
Loss = 1.2117e-02, PNorm = 157.5929, GNorm = 0.6137, lr_0 = 4.4482e-04
Loss = 1.2777e-02, PNorm = 157.6219, GNorm = 0.0912, lr_0 = 4.4452e-04
Loss = 1.0589e-02, PNorm = 157.6505, GNorm = 0.2669, lr_0 = 4.4421e-04
Loss = 1.0558e-02, PNorm = 157.6793, GNorm = 0.4653, lr_0 = 4.4391e-04
Loss = 1.1503e-02, PNorm = 157.7105, GNorm = 0.1259, lr_0 = 4.4360e-04
Loss = 1.0489e-02, PNorm = 157.7399, GNorm = 0.4205, lr_0 = 4.4330e-04
Loss = 1.7523e-02, PNorm = 157.7683, GNorm = 0.4124, lr_0 = 4.4299e-04
Loss = 1.2130e-02, PNorm = 157.7929, GNorm = 0.1602, lr_0 = 4.4269e-04
Loss = 1.3028e-02, PNorm = 157.8232, GNorm = 0.4215, lr_0 = 4.4239e-04
Loss = 1.3396e-02, PNorm = 157.8561, GNorm = 0.2372, lr_0 = 4.4209e-04
Loss = 1.1290e-02, PNorm = 157.8887, GNorm = 0.2057, lr_0 = 4.4178e-04
Loss = 1.4230e-02, PNorm = 157.9193, GNorm = 0.3422, lr_0 = 4.4148e-04
Loss = 1.3375e-02, PNorm = 157.9461, GNorm = 0.5123, lr_0 = 4.4118e-04
Loss = 1.3087e-02, PNorm = 157.9746, GNorm = 0.3825, lr_0 = 4.4088e-04
Loss = 1.2306e-02, PNorm = 158.0034, GNorm = 0.1470, lr_0 = 4.4057e-04
Loss = 1.2707e-02, PNorm = 158.0361, GNorm = 0.1180, lr_0 = 4.4027e-04
Loss = 1.2753e-02, PNorm = 158.0671, GNorm = 0.2720, lr_0 = 4.3997e-04
Loss = 1.2572e-02, PNorm = 158.0956, GNorm = 0.4161, lr_0 = 4.3967e-04
Loss = 1.4675e-02, PNorm = 158.1288, GNorm = 0.3444, lr_0 = 4.3937e-04
Validation mae = 0.281007
Epoch 12
Loss = 1.0632e-02, PNorm = 158.1450, GNorm = 0.4685, lr_0 = 4.3907e-04
Loss = 1.2427e-02, PNorm = 158.1663, GNorm = 0.3312, lr_0 = 4.3877e-04
Loss = 1.1494e-02, PNorm = 158.1859, GNorm = 0.4953, lr_0 = 4.3846e-04
Loss = 1.0470e-02, PNorm = 158.2068, GNorm = 0.2525, lr_0 = 4.3816e-04
Loss = 1.0215e-02, PNorm = 158.2279, GNorm = 0.5844, lr_0 = 4.3786e-04
Loss = 9.4690e-03, PNorm = 158.2450, GNorm = 0.1089, lr_0 = 4.3756e-04
Loss = 1.1142e-02, PNorm = 158.2642, GNorm = 0.2177, lr_0 = 4.3726e-04
Loss = 9.1407e-03, PNorm = 158.2827, GNorm = 0.1928, lr_0 = 4.3696e-04
Loss = 1.1342e-02, PNorm = 158.3044, GNorm = 0.4638, lr_0 = 4.3667e-04
Loss = 9.5271e-03, PNorm = 158.3274, GNorm = 0.1095, lr_0 = 4.3637e-04
Loss = 1.0351e-02, PNorm = 158.3506, GNorm = 0.2164, lr_0 = 4.3607e-04
Loss = 8.4280e-03, PNorm = 158.3729, GNorm = 0.2401, lr_0 = 4.3577e-04
Loss = 8.6669e-03, PNorm = 158.3889, GNorm = 0.2789, lr_0 = 4.3547e-04
Loss = 1.2079e-02, PNorm = 158.4059, GNorm = 0.3147, lr_0 = 4.3517e-04
Loss = 1.0417e-02, PNorm = 158.4258, GNorm = 0.5743, lr_0 = 4.3487e-04
Loss = 8.4077e-03, PNorm = 158.4439, GNorm = 0.2857, lr_0 = 4.3458e-04
Loss = 9.7518e-03, PNorm = 158.4659, GNorm = 0.3685, lr_0 = 4.3428e-04
Loss = 9.9641e-03, PNorm = 158.4874, GNorm = 0.2348, lr_0 = 4.3398e-04
Loss = 8.9143e-03, PNorm = 158.5069, GNorm = 0.2795, lr_0 = 4.3368e-04
Loss = 1.0232e-02, PNorm = 158.5275, GNorm = 0.2883, lr_0 = 4.3339e-04
Loss = 9.7731e-03, PNorm = 158.5443, GNorm = 0.2747, lr_0 = 4.3309e-04
Loss = 1.1295e-02, PNorm = 158.5639, GNorm = 0.5912, lr_0 = 4.3279e-04
Loss = 1.0258e-02, PNorm = 158.5848, GNorm = 0.3642, lr_0 = 4.3250e-04
Loss = 9.4569e-03, PNorm = 158.6038, GNorm = 0.1975, lr_0 = 4.3220e-04
Loss = 1.0209e-02, PNorm = 158.6249, GNorm = 0.1637, lr_0 = 4.3190e-04
Loss = 9.0605e-03, PNorm = 158.6421, GNorm = 0.1902, lr_0 = 4.3161e-04
Loss = 9.1046e-03, PNorm = 158.6619, GNorm = 0.2024, lr_0 = 4.3131e-04
Loss = 1.0812e-02, PNorm = 158.6761, GNorm = 0.4902, lr_0 = 4.3102e-04
Loss = 8.9156e-03, PNorm = 158.6925, GNorm = 0.2433, lr_0 = 4.3072e-04
Loss = 1.0454e-02, PNorm = 158.7162, GNorm = 0.1668, lr_0 = 4.3043e-04
Loss = 8.4496e-03, PNorm = 158.7371, GNorm = 0.3128, lr_0 = 4.3013e-04
Loss = 1.1510e-02, PNorm = 158.7563, GNorm = 0.1045, lr_0 = 4.2984e-04
Loss = 8.3149e-03, PNorm = 158.7769, GNorm = 0.1987, lr_0 = 4.2954e-04
Loss = 1.3735e-02, PNorm = 158.7996, GNorm = 0.1786, lr_0 = 4.2925e-04
Loss = 8.8239e-03, PNorm = 158.8229, GNorm = 0.3021, lr_0 = 4.2895e-04
Loss = 1.1619e-02, PNorm = 158.8412, GNorm = 0.1997, lr_0 = 4.2866e-04
Loss = 9.5368e-03, PNorm = 158.8635, GNorm = 0.2313, lr_0 = 4.2837e-04
Loss = 1.0614e-02, PNorm = 158.8852, GNorm = 0.2330, lr_0 = 4.2807e-04
Loss = 1.1532e-02, PNorm = 158.9088, GNorm = 0.1108, lr_0 = 4.2778e-04
Loss = 9.5782e-03, PNorm = 158.9248, GNorm = 0.3653, lr_0 = 4.2749e-04
Loss = 8.6980e-03, PNorm = 158.9402, GNorm = 0.1304, lr_0 = 4.2719e-04
Loss = 1.0818e-02, PNorm = 158.9582, GNorm = 0.4453, lr_0 = 4.2690e-04
Loss = 1.3135e-02, PNorm = 158.9808, GNorm = 0.7379, lr_0 = 4.2661e-04
Loss = 9.5855e-03, PNorm = 159.0004, GNorm = 0.1405, lr_0 = 4.2632e-04
Loss = 7.6071e-03, PNorm = 159.0218, GNorm = 0.2520, lr_0 = 4.2602e-04
Loss = 1.1393e-02, PNorm = 159.0387, GNorm = 0.2661, lr_0 = 4.2573e-04
Loss = 1.1728e-02, PNorm = 159.0600, GNorm = 0.2761, lr_0 = 4.2544e-04
Loss = 1.0580e-02, PNorm = 159.0813, GNorm = 0.6275, lr_0 = 4.2515e-04
Loss = 9.6607e-03, PNorm = 159.1018, GNorm = 0.4629, lr_0 = 4.2486e-04
Loss = 1.1772e-02, PNorm = 159.1238, GNorm = 0.1956, lr_0 = 4.2457e-04
Loss = 9.8511e-03, PNorm = 159.1462, GNorm = 0.3690, lr_0 = 4.2428e-04
Loss = 1.1374e-02, PNorm = 159.1699, GNorm = 0.4338, lr_0 = 4.2399e-04
Loss = 1.1775e-02, PNorm = 159.1883, GNorm = 0.1020, lr_0 = 4.2370e-04
Loss = 8.5242e-03, PNorm = 159.2069, GNorm = 0.2791, lr_0 = 4.2340e-04
Loss = 1.0354e-02, PNorm = 159.2299, GNorm = 0.5561, lr_0 = 4.2311e-04
Loss = 8.8421e-03, PNorm = 159.2540, GNorm = 0.2602, lr_0 = 4.2283e-04
Loss = 9.7797e-03, PNorm = 159.2766, GNorm = 0.4564, lr_0 = 4.2254e-04
Loss = 1.1047e-02, PNorm = 159.2976, GNorm = 0.1534, lr_0 = 4.2225e-04
Loss = 8.8302e-03, PNorm = 159.3183, GNorm = 0.2156, lr_0 = 4.2196e-04
Loss = 8.4854e-03, PNorm = 159.3390, GNorm = 0.2467, lr_0 = 4.2167e-04
Loss = 8.8494e-03, PNorm = 159.3597, GNorm = 0.2584, lr_0 = 4.2138e-04
Loss = 1.0918e-02, PNorm = 159.3816, GNorm = 0.8130, lr_0 = 4.2109e-04
Loss = 8.2621e-03, PNorm = 159.4029, GNorm = 0.1337, lr_0 = 4.2080e-04
Loss = 1.0086e-02, PNorm = 159.4234, GNorm = 0.2090, lr_0 = 4.2051e-04
Loss = 9.8571e-03, PNorm = 159.4463, GNorm = 0.4589, lr_0 = 4.2023e-04
Loss = 7.7122e-03, PNorm = 159.4700, GNorm = 0.1075, lr_0 = 4.1994e-04
Loss = 9.8204e-03, PNorm = 159.4938, GNorm = 0.1890, lr_0 = 4.1965e-04
Loss = 1.2517e-02, PNorm = 159.5181, GNorm = 0.1969, lr_0 = 4.1936e-04
Loss = 8.5583e-03, PNorm = 159.5379, GNorm = 0.1921, lr_0 = 4.1907e-04
Loss = 8.6389e-03, PNorm = 159.5554, GNorm = 0.9556, lr_0 = 4.1879e-04
Loss = 1.0041e-02, PNorm = 159.5698, GNorm = 0.1732, lr_0 = 4.1850e-04
Loss = 1.1175e-02, PNorm = 159.5909, GNorm = 0.1058, lr_0 = 4.1821e-04
Loss = 8.1588e-03, PNorm = 159.6118, GNorm = 0.2636, lr_0 = 4.1793e-04
Loss = 1.0738e-02, PNorm = 159.6312, GNorm = 0.1714, lr_0 = 4.1764e-04
Loss = 1.0565e-02, PNorm = 159.6521, GNorm = 0.3657, lr_0 = 4.1736e-04
Loss = 1.1123e-02, PNorm = 159.6723, GNorm = 0.1097, lr_0 = 4.1707e-04
Loss = 9.6044e-03, PNorm = 159.6937, GNorm = 0.2420, lr_0 = 4.1678e-04
Loss = 9.9157e-03, PNorm = 159.7167, GNorm = 0.1114, lr_0 = 4.1650e-04
Loss = 9.3060e-03, PNorm = 159.7378, GNorm = 0.4467, lr_0 = 4.1621e-04
Loss = 9.8179e-03, PNorm = 159.7614, GNorm = 0.3587, lr_0 = 4.1593e-04
Loss = 1.3299e-02, PNorm = 159.7835, GNorm = 0.5447, lr_0 = 4.1564e-04
Loss = 1.0194e-02, PNorm = 159.8040, GNorm = 0.5810, lr_0 = 4.1536e-04
Loss = 1.1586e-02, PNorm = 159.8230, GNorm = 0.3451, lr_0 = 4.1507e-04
Loss = 8.0704e-03, PNorm = 159.8437, GNorm = 0.2393, lr_0 = 4.1479e-04
Loss = 8.5506e-03, PNorm = 159.8626, GNorm = 0.4703, lr_0 = 4.1450e-04
Loss = 9.4323e-03, PNorm = 159.8824, GNorm = 0.2377, lr_0 = 4.1422e-04
Loss = 8.0918e-03, PNorm = 159.9010, GNorm = 0.1572, lr_0 = 4.1394e-04
Loss = 7.7722e-03, PNorm = 159.9202, GNorm = 0.1861, lr_0 = 4.1365e-04
Loss = 8.5991e-03, PNorm = 159.9424, GNorm = 0.2463, lr_0 = 4.1337e-04
Loss = 1.2824e-02, PNorm = 159.9628, GNorm = 0.4878, lr_0 = 4.1309e-04
Loss = 1.1944e-02, PNorm = 159.9865, GNorm = 0.3038, lr_0 = 4.1280e-04
Loss = 1.0524e-02, PNorm = 160.0075, GNorm = 0.5136, lr_0 = 4.1252e-04
Loss = 9.6881e-03, PNorm = 160.0321, GNorm = 0.5848, lr_0 = 4.1224e-04
Loss = 1.2012e-02, PNorm = 160.0563, GNorm = 0.5438, lr_0 = 4.1196e-04
Loss = 1.1606e-02, PNorm = 160.0827, GNorm = 0.5652, lr_0 = 4.1167e-04
Loss = 1.0279e-02, PNorm = 160.1130, GNorm = 0.1426, lr_0 = 4.1139e-04
Loss = 1.0126e-02, PNorm = 160.1432, GNorm = 0.0996, lr_0 = 4.1111e-04
Loss = 1.1015e-02, PNorm = 160.1670, GNorm = 0.5356, lr_0 = 4.1083e-04
Loss = 1.0058e-02, PNorm = 160.1896, GNorm = 0.5209, lr_0 = 4.1055e-04
Loss = 1.0147e-02, PNorm = 160.2141, GNorm = 0.2752, lr_0 = 4.1027e-04
Loss = 1.2226e-02, PNorm = 160.2382, GNorm = 0.2064, lr_0 = 4.0998e-04
Loss = 1.2399e-02, PNorm = 160.2637, GNorm = 0.3601, lr_0 = 4.0970e-04
Loss = 1.4188e-02, PNorm = 160.2880, GNorm = 0.1188, lr_0 = 4.0942e-04
Loss = 1.0236e-02, PNorm = 160.3100, GNorm = 0.1210, lr_0 = 4.0914e-04
Loss = 9.3102e-03, PNorm = 160.3317, GNorm = 0.1619, lr_0 = 4.0886e-04
Loss = 1.1772e-02, PNorm = 160.3524, GNorm = 0.2328, lr_0 = 4.0858e-04
Loss = 8.5249e-03, PNorm = 160.3774, GNorm = 0.4471, lr_0 = 4.0830e-04
Loss = 9.2323e-03, PNorm = 160.3996, GNorm = 0.2991, lr_0 = 4.0802e-04
Loss = 9.8544e-03, PNorm = 160.4189, GNorm = 0.4281, lr_0 = 4.0774e-04
Loss = 9.0058e-03, PNorm = 160.4387, GNorm = 0.2011, lr_0 = 4.0746e-04
Loss = 1.0835e-02, PNorm = 160.4623, GNorm = 0.2341, lr_0 = 4.0718e-04
Loss = 9.5956e-03, PNorm = 160.4853, GNorm = 0.2742, lr_0 = 4.0691e-04
Loss = 1.0477e-02, PNorm = 160.5110, GNorm = 0.3726, lr_0 = 4.0663e-04
Loss = 1.1255e-02, PNorm = 160.5324, GNorm = 0.2854, lr_0 = 4.0635e-04
Loss = 9.1908e-03, PNorm = 160.5510, GNorm = 0.5425, lr_0 = 4.0607e-04
Loss = 9.4091e-03, PNorm = 160.5720, GNorm = 0.1154, lr_0 = 4.0579e-04
Loss = 9.3711e-03, PNorm = 160.5951, GNorm = 0.5618, lr_0 = 4.0551e-04
Loss = 9.9142e-03, PNorm = 160.6215, GNorm = 0.3308, lr_0 = 4.0524e-04
Loss = 1.0186e-02, PNorm = 160.6441, GNorm = 0.4589, lr_0 = 4.0496e-04
Loss = 8.3125e-03, PNorm = 160.6684, GNorm = 0.5651, lr_0 = 4.0468e-04
Validation mae = 0.281907
Epoch 13
Loss = 1.0291e-02, PNorm = 160.6912, GNorm = 0.7277, lr_0 = 4.0440e-04
Loss = 9.8780e-03, PNorm = 160.7042, GNorm = 0.2547, lr_0 = 4.0413e-04
Loss = 7.0181e-03, PNorm = 160.7163, GNorm = 0.2642, lr_0 = 4.0385e-04
Loss = 1.0005e-02, PNorm = 160.7319, GNorm = 0.4017, lr_0 = 4.0357e-04
Loss = 8.5877e-03, PNorm = 160.7474, GNorm = 0.2429, lr_0 = 4.0330e-04
Loss = 9.4190e-03, PNorm = 160.7674, GNorm = 0.1402, lr_0 = 4.0302e-04
Loss = 9.1386e-03, PNorm = 160.7880, GNorm = 0.2131, lr_0 = 4.0274e-04
Loss = 8.2059e-03, PNorm = 160.8053, GNorm = 0.2550, lr_0 = 4.0247e-04
Loss = 8.4037e-03, PNorm = 160.8244, GNorm = 0.2201, lr_0 = 4.0219e-04
Loss = 8.3249e-03, PNorm = 160.8404, GNorm = 0.2972, lr_0 = 4.0192e-04
Loss = 8.7065e-03, PNorm = 160.8590, GNorm = 0.1817, lr_0 = 4.0164e-04
Loss = 7.2833e-03, PNorm = 160.8776, GNorm = 0.3581, lr_0 = 4.0137e-04
Loss = 8.1899e-03, PNorm = 160.8902, GNorm = 0.1672, lr_0 = 4.0109e-04
Loss = 7.7218e-03, PNorm = 160.9068, GNorm = 0.1204, lr_0 = 4.0082e-04
Loss = 8.3872e-03, PNorm = 160.9194, GNorm = 0.0910, lr_0 = 4.0054e-04
Loss = 7.0809e-03, PNorm = 160.9353, GNorm = 0.4566, lr_0 = 4.0027e-04
Loss = 8.6846e-03, PNorm = 160.9501, GNorm = 0.2028, lr_0 = 3.9999e-04
Loss = 8.1484e-03, PNorm = 160.9646, GNorm = 0.3283, lr_0 = 3.9972e-04
Loss = 7.6464e-03, PNorm = 160.9796, GNorm = 0.2553, lr_0 = 3.9945e-04
Loss = 9.5630e-03, PNorm = 160.9964, GNorm = 0.3370, lr_0 = 3.9917e-04
Loss = 1.0396e-02, PNorm = 161.0167, GNorm = 0.2913, lr_0 = 3.9890e-04
Loss = 9.6118e-03, PNorm = 161.0320, GNorm = 0.3917, lr_0 = 3.9863e-04
Loss = 7.6069e-03, PNorm = 161.0499, GNorm = 0.1472, lr_0 = 3.9835e-04
Loss = 9.6775e-03, PNorm = 161.0667, GNorm = 0.1435, lr_0 = 3.9808e-04
Loss = 9.3120e-03, PNorm = 161.0834, GNorm = 0.1777, lr_0 = 3.9781e-04
Loss = 8.3208e-03, PNorm = 161.1007, GNorm = 0.2414, lr_0 = 3.9753e-04
Loss = 8.7210e-03, PNorm = 161.1178, GNorm = 0.2147, lr_0 = 3.9726e-04
Loss = 9.4648e-03, PNorm = 161.1351, GNorm = 0.3059, lr_0 = 3.9699e-04
Loss = 8.7975e-03, PNorm = 161.1505, GNorm = 0.2693, lr_0 = 3.9672e-04
Loss = 9.6554e-03, PNorm = 161.1679, GNorm = 0.2660, lr_0 = 3.9645e-04
Loss = 1.1906e-02, PNorm = 161.1834, GNorm = 0.4662, lr_0 = 3.9617e-04
Loss = 9.6836e-03, PNorm = 161.1957, GNorm = 0.2173, lr_0 = 3.9590e-04
Loss = 8.5992e-03, PNorm = 161.2128, GNorm = 0.4501, lr_0 = 3.9563e-04
Loss = 7.9139e-03, PNorm = 161.2328, GNorm = 0.4838, lr_0 = 3.9536e-04
Loss = 8.4569e-03, PNorm = 161.2511, GNorm = 0.4186, lr_0 = 3.9509e-04
Loss = 9.4619e-03, PNorm = 161.2683, GNorm = 0.3828, lr_0 = 3.9482e-04
Loss = 7.7289e-03, PNorm = 161.2844, GNorm = 0.1814, lr_0 = 3.9455e-04
Loss = 7.5862e-03, PNorm = 161.3004, GNorm = 0.1655, lr_0 = 3.9428e-04
Loss = 8.1456e-03, PNorm = 161.3179, GNorm = 0.1270, lr_0 = 3.9401e-04
Loss = 1.4083e-02, PNorm = 161.3373, GNorm = 0.3866, lr_0 = 3.9374e-04
Loss = 8.4966e-03, PNorm = 161.3536, GNorm = 0.2359, lr_0 = 3.9347e-04
Loss = 1.1299e-02, PNorm = 161.3703, GNorm = 0.6116, lr_0 = 3.9320e-04
Loss = 9.0700e-03, PNorm = 161.3912, GNorm = 0.1360, lr_0 = 3.9293e-04
Loss = 7.4035e-03, PNorm = 161.4126, GNorm = 0.3563, lr_0 = 3.9266e-04
Loss = 8.3963e-03, PNorm = 161.4286, GNorm = 0.1034, lr_0 = 3.9239e-04
Loss = 9.1937e-03, PNorm = 161.4472, GNorm = 0.2911, lr_0 = 3.9212e-04
Loss = 7.7321e-03, PNorm = 161.4690, GNorm = 0.1285, lr_0 = 3.9185e-04
Loss = 7.3146e-03, PNorm = 161.4895, GNorm = 0.3649, lr_0 = 3.9159e-04
Loss = 7.9788e-03, PNorm = 161.5075, GNorm = 0.5100, lr_0 = 3.9132e-04
Loss = 8.1772e-03, PNorm = 161.5266, GNorm = 0.2930, lr_0 = 3.9105e-04
Loss = 8.7315e-03, PNorm = 161.5440, GNorm = 0.3156, lr_0 = 3.9078e-04
Loss = 7.4644e-03, PNorm = 161.5633, GNorm = 0.2858, lr_0 = 3.9051e-04
Loss = 9.2166e-03, PNorm = 161.5811, GNorm = 0.1287, lr_0 = 3.9025e-04
Loss = 8.2856e-03, PNorm = 161.5988, GNorm = 0.2505, lr_0 = 3.8998e-04
Loss = 7.1083e-03, PNorm = 161.6157, GNorm = 0.0729, lr_0 = 3.8971e-04
Loss = 7.8706e-03, PNorm = 161.6333, GNorm = 0.1040, lr_0 = 3.8945e-04
Loss = 9.4855e-03, PNorm = 161.6502, GNorm = 0.2289, lr_0 = 3.8918e-04
Loss = 8.0860e-03, PNorm = 161.6707, GNorm = 0.1835, lr_0 = 3.8891e-04
Loss = 8.2912e-03, PNorm = 161.6913, GNorm = 0.1740, lr_0 = 3.8865e-04
Loss = 7.7415e-03, PNorm = 161.7097, GNorm = 0.1574, lr_0 = 3.8838e-04
Loss = 8.1837e-03, PNorm = 161.7303, GNorm = 0.4128, lr_0 = 3.8811e-04
Loss = 7.0389e-03, PNorm = 161.7505, GNorm = 0.2488, lr_0 = 3.8785e-04
Loss = 7.6344e-03, PNorm = 161.7668, GNorm = 0.1646, lr_0 = 3.8758e-04
Loss = 8.5669e-03, PNorm = 161.7832, GNorm = 0.2608, lr_0 = 3.8732e-04
Loss = 7.3647e-03, PNorm = 161.8021, GNorm = 0.2976, lr_0 = 3.8705e-04
Loss = 8.5795e-03, PNorm = 161.8183, GNorm = 0.1127, lr_0 = 3.8679e-04
Loss = 7.4646e-03, PNorm = 161.8346, GNorm = 0.2067, lr_0 = 3.8652e-04
Loss = 1.2645e-02, PNorm = 161.8526, GNorm = 0.5845, lr_0 = 3.8626e-04
Loss = 1.0044e-02, PNorm = 161.8696, GNorm = 0.3852, lr_0 = 3.8599e-04
Loss = 9.4320e-03, PNorm = 161.8904, GNorm = 0.3089, lr_0 = 3.8573e-04
Loss = 8.9137e-03, PNorm = 161.9050, GNorm = 0.6510, lr_0 = 3.8546e-04
Loss = 1.1149e-02, PNorm = 161.9196, GNorm = 0.1137, lr_0 = 3.8520e-04
Loss = 6.8358e-03, PNorm = 161.9396, GNorm = 0.1280, lr_0 = 3.8493e-04
Loss = 9.5882e-03, PNorm = 161.9589, GNorm = 0.1545, lr_0 = 3.8467e-04
Loss = 8.5511e-03, PNorm = 161.9765, GNorm = 0.2483, lr_0 = 3.8441e-04
Loss = 8.2982e-03, PNorm = 161.9912, GNorm = 0.1539, lr_0 = 3.8414e-04
Loss = 9.5430e-03, PNorm = 162.0070, GNorm = 0.1406, lr_0 = 3.8388e-04
Loss = 9.1025e-03, PNorm = 162.0223, GNorm = 0.2027, lr_0 = 3.8362e-04
Loss = 6.8207e-03, PNorm = 162.0388, GNorm = 0.1658, lr_0 = 3.8336e-04
Loss = 8.2860e-03, PNorm = 162.0573, GNorm = 0.4755, lr_0 = 3.8309e-04
Loss = 6.6825e-03, PNorm = 162.0779, GNorm = 0.1984, lr_0 = 3.8283e-04
Loss = 7.8618e-03, PNorm = 162.0979, GNorm = 0.4810, lr_0 = 3.8257e-04
Loss = 9.1266e-03, PNorm = 162.1180, GNorm = 0.5132, lr_0 = 3.8231e-04
Loss = 7.9817e-03, PNorm = 162.1338, GNorm = 0.1557, lr_0 = 3.8204e-04
Loss = 8.8567e-03, PNorm = 162.1517, GNorm = 0.1834, lr_0 = 3.8178e-04
Loss = 6.8110e-03, PNorm = 162.1682, GNorm = 0.1383, lr_0 = 3.8152e-04
Loss = 7.3477e-03, PNorm = 162.1847, GNorm = 0.0957, lr_0 = 3.8126e-04
Loss = 7.9930e-03, PNorm = 162.2035, GNorm = 0.2780, lr_0 = 3.8100e-04
Loss = 8.3960e-03, PNorm = 162.2227, GNorm = 0.1317, lr_0 = 3.8074e-04
Loss = 8.1317e-03, PNorm = 162.2375, GNorm = 0.2312, lr_0 = 3.8048e-04
Loss = 7.2704e-03, PNorm = 162.2532, GNorm = 0.2555, lr_0 = 3.8022e-04
Loss = 6.8897e-03, PNorm = 162.2715, GNorm = 0.1511, lr_0 = 3.7995e-04
Loss = 7.4788e-03, PNorm = 162.2926, GNorm = 0.3709, lr_0 = 3.7969e-04
Loss = 9.6657e-03, PNorm = 162.3055, GNorm = 0.4578, lr_0 = 3.7943e-04
Loss = 9.1753e-03, PNorm = 162.3205, GNorm = 0.2365, lr_0 = 3.7917e-04
Loss = 7.0537e-03, PNorm = 162.3380, GNorm = 0.4498, lr_0 = 3.7891e-04
Loss = 7.3120e-03, PNorm = 162.3543, GNorm = 0.2020, lr_0 = 3.7866e-04
Loss = 8.3218e-03, PNorm = 162.3711, GNorm = 0.1910, lr_0 = 3.7840e-04
Loss = 6.6741e-03, PNorm = 162.3903, GNorm = 0.4329, lr_0 = 3.7814e-04
Loss = 9.8294e-03, PNorm = 162.4107, GNorm = 0.7461, lr_0 = 3.7788e-04
Loss = 9.5974e-03, PNorm = 162.4299, GNorm = 0.2528, lr_0 = 3.7762e-04
Loss = 9.6864e-03, PNorm = 162.4533, GNorm = 0.2253, lr_0 = 3.7736e-04
Loss = 8.0759e-03, PNorm = 162.4792, GNorm = 0.2459, lr_0 = 3.7710e-04
Loss = 8.6972e-03, PNorm = 162.5012, GNorm = 0.2926, lr_0 = 3.7684e-04
Loss = 8.1757e-03, PNorm = 162.5193, GNorm = 0.3341, lr_0 = 3.7659e-04
Loss = 8.5386e-03, PNorm = 162.5390, GNorm = 0.3031, lr_0 = 3.7633e-04
Loss = 1.0065e-02, PNorm = 162.5546, GNorm = 0.4498, lr_0 = 3.7607e-04
Loss = 9.6058e-03, PNorm = 162.5748, GNorm = 0.1456, lr_0 = 3.7581e-04
Loss = 7.6299e-03, PNorm = 162.5917, GNorm = 0.2599, lr_0 = 3.7555e-04
Loss = 6.8216e-03, PNorm = 162.6074, GNorm = 0.4886, lr_0 = 3.7530e-04
Loss = 7.4435e-03, PNorm = 162.6251, GNorm = 0.1466, lr_0 = 3.7504e-04
Loss = 1.0320e-02, PNorm = 162.6447, GNorm = 0.1621, lr_0 = 3.7478e-04
Loss = 9.5703e-03, PNorm = 162.6678, GNorm = 0.2911, lr_0 = 3.7453e-04
Loss = 1.0693e-02, PNorm = 162.6841, GNorm = 0.4504, lr_0 = 3.7427e-04
Loss = 9.5310e-03, PNorm = 162.7061, GNorm = 0.3677, lr_0 = 3.7401e-04
Loss = 9.1583e-03, PNorm = 162.7266, GNorm = 0.1852, lr_0 = 3.7376e-04
Loss = 7.9041e-03, PNorm = 162.7489, GNorm = 0.3323, lr_0 = 3.7350e-04
Loss = 8.8954e-03, PNorm = 162.7695, GNorm = 0.3712, lr_0 = 3.7325e-04
Loss = 1.1110e-02, PNorm = 162.7885, GNorm = 0.2191, lr_0 = 3.7299e-04
Loss = 9.1975e-03, PNorm = 162.8102, GNorm = 0.1472, lr_0 = 3.7273e-04
Validation mae = 0.280163
Epoch 14
Loss = 7.4488e-03, PNorm = 162.8286, GNorm = 0.4316, lr_0 = 3.7248e-04
Loss = 8.0456e-03, PNorm = 162.8452, GNorm = 0.0952, lr_0 = 3.7222e-04
Loss = 7.9234e-03, PNorm = 162.8558, GNorm = 0.2124, lr_0 = 3.7197e-04
Loss = 8.8919e-03, PNorm = 162.8688, GNorm = 0.1245, lr_0 = 3.7171e-04
Loss = 8.2505e-03, PNorm = 162.8848, GNorm = 0.5660, lr_0 = 3.7146e-04
Loss = 6.8073e-03, PNorm = 162.9005, GNorm = 0.4053, lr_0 = 3.7120e-04
Loss = 9.6698e-03, PNorm = 162.9169, GNorm = 0.3255, lr_0 = 3.7095e-04
Loss = 8.1927e-03, PNorm = 162.9262, GNorm = 0.1868, lr_0 = 3.7070e-04
Loss = 7.0524e-03, PNorm = 162.9371, GNorm = 0.1266, lr_0 = 3.7044e-04
Loss = 8.8641e-03, PNorm = 162.9507, GNorm = 0.2990, lr_0 = 3.7019e-04
Loss = 6.6768e-03, PNorm = 162.9649, GNorm = 0.1465, lr_0 = 3.6993e-04
Loss = 7.4701e-03, PNorm = 162.9749, GNorm = 0.3972, lr_0 = 3.6968e-04
Loss = 7.0980e-03, PNorm = 162.9847, GNorm = 0.1792, lr_0 = 3.6943e-04
Loss = 7.1408e-03, PNorm = 162.9971, GNorm = 0.4829, lr_0 = 3.6917e-04
Loss = 5.9376e-03, PNorm = 163.0140, GNorm = 0.2523, lr_0 = 3.6892e-04
Loss = 6.8482e-03, PNorm = 163.0258, GNorm = 0.2595, lr_0 = 3.6867e-04
Loss = 7.4537e-03, PNorm = 163.0386, GNorm = 0.2034, lr_0 = 3.6842e-04
Loss = 7.1890e-03, PNorm = 163.0483, GNorm = 0.4520, lr_0 = 3.6816e-04
Loss = 6.2024e-03, PNorm = 163.0629, GNorm = 0.2759, lr_0 = 3.6791e-04
Loss = 6.3580e-03, PNorm = 163.0752, GNorm = 0.3100, lr_0 = 3.6766e-04
Loss = 6.3664e-03, PNorm = 163.0896, GNorm = 0.1538, lr_0 = 3.6741e-04
Loss = 7.1817e-03, PNorm = 163.1073, GNorm = 0.1289, lr_0 = 3.6716e-04
Loss = 6.2899e-03, PNorm = 163.1215, GNorm = 0.1959, lr_0 = 3.6690e-04
Loss = 8.2011e-03, PNorm = 163.1354, GNorm = 0.2287, lr_0 = 3.6665e-04
Loss = 6.8241e-03, PNorm = 163.1474, GNorm = 0.0809, lr_0 = 3.6640e-04
Loss = 6.5755e-03, PNorm = 163.1635, GNorm = 0.1245, lr_0 = 3.6615e-04
Loss = 7.8758e-03, PNorm = 163.1756, GNorm = 0.2341, lr_0 = 3.6590e-04
Loss = 6.6946e-03, PNorm = 163.1885, GNorm = 0.2544, lr_0 = 3.6565e-04
Loss = 7.0672e-03, PNorm = 163.2022, GNorm = 0.2409, lr_0 = 3.6540e-04
Loss = 7.6168e-03, PNorm = 163.2208, GNorm = 0.1600, lr_0 = 3.6515e-04
Loss = 6.9724e-03, PNorm = 163.2378, GNorm = 0.2692, lr_0 = 3.6490e-04
Loss = 6.9101e-03, PNorm = 163.2515, GNorm = 0.1072, lr_0 = 3.6465e-04
Loss = 7.1264e-03, PNorm = 163.2629, GNorm = 0.1605, lr_0 = 3.6440e-04
Loss = 7.1038e-03, PNorm = 163.2728, GNorm = 0.1105, lr_0 = 3.6415e-04
Loss = 9.8690e-03, PNorm = 163.2853, GNorm = 0.4695, lr_0 = 3.6390e-04
Loss = 9.0329e-03, PNorm = 163.3025, GNorm = 0.1880, lr_0 = 3.6365e-04
Loss = 6.8841e-03, PNorm = 163.3183, GNorm = 0.2262, lr_0 = 3.6340e-04
Loss = 7.5160e-03, PNorm = 163.3327, GNorm = 0.1705, lr_0 = 3.6315e-04
Loss = 8.1184e-03, PNorm = 163.3457, GNorm = 0.2610, lr_0 = 3.6290e-04
Loss = 7.5683e-03, PNorm = 163.3602, GNorm = 0.3216, lr_0 = 3.6266e-04
Loss = 9.1475e-03, PNorm = 163.3776, GNorm = 0.1868, lr_0 = 3.6241e-04
Loss = 5.9501e-03, PNorm = 163.3916, GNorm = 0.4516, lr_0 = 3.6216e-04
Loss = 6.7414e-03, PNorm = 163.4069, GNorm = 0.2439, lr_0 = 3.6191e-04
Loss = 7.3962e-03, PNorm = 163.4210, GNorm = 0.3185, lr_0 = 3.6166e-04
Loss = 7.7855e-03, PNorm = 163.4362, GNorm = 0.1999, lr_0 = 3.6141e-04
Loss = 6.6120e-03, PNorm = 163.4494, GNorm = 0.3436, lr_0 = 3.6117e-04
Loss = 6.7837e-03, PNorm = 163.4666, GNorm = 0.2929, lr_0 = 3.6092e-04
Loss = 5.7886e-03, PNorm = 163.4800, GNorm = 0.1648, lr_0 = 3.6067e-04
Loss = 6.6244e-03, PNorm = 163.4986, GNorm = 0.3313, lr_0 = 3.6043e-04
Loss = 7.0507e-03, PNorm = 163.5129, GNorm = 0.2028, lr_0 = 3.6018e-04
Loss = 8.0478e-03, PNorm = 163.5277, GNorm = 0.3952, lr_0 = 3.5993e-04
Loss = 6.7922e-03, PNorm = 163.5442, GNorm = 0.1807, lr_0 = 3.5969e-04
Loss = 8.2477e-03, PNorm = 163.5611, GNorm = 0.4178, lr_0 = 3.5944e-04
Loss = 7.3375e-03, PNorm = 163.5736, GNorm = 0.3544, lr_0 = 3.5919e-04
Loss = 6.2354e-03, PNorm = 163.5892, GNorm = 0.0887, lr_0 = 3.5895e-04
Loss = 6.7033e-03, PNorm = 163.6017, GNorm = 0.1675, lr_0 = 3.5870e-04
Loss = 6.6903e-03, PNorm = 163.6153, GNorm = 0.3453, lr_0 = 3.5845e-04
Loss = 6.3088e-03, PNorm = 163.6261, GNorm = 0.2904, lr_0 = 3.5821e-04
Loss = 5.9315e-03, PNorm = 163.6407, GNorm = 0.1615, lr_0 = 3.5796e-04
Loss = 6.5245e-03, PNorm = 163.6528, GNorm = 0.4219, lr_0 = 3.5772e-04
Loss = 6.6871e-03, PNorm = 163.6673, GNorm = 0.2257, lr_0 = 3.5747e-04
Loss = 6.4067e-03, PNorm = 163.6814, GNorm = 0.4061, lr_0 = 3.5723e-04
Loss = 7.2163e-03, PNorm = 163.6973, GNorm = 0.2435, lr_0 = 3.5698e-04
Loss = 6.8255e-03, PNorm = 163.7123, GNorm = 0.2463, lr_0 = 3.5674e-04
Loss = 6.4930e-03, PNorm = 163.7293, GNorm = 0.4429, lr_0 = 3.5650e-04
Loss = 8.1596e-03, PNorm = 163.7410, GNorm = 0.3811, lr_0 = 3.5625e-04
Loss = 5.9623e-03, PNorm = 163.7589, GNorm = 0.1588, lr_0 = 3.5601e-04
Loss = 7.3143e-03, PNorm = 163.7753, GNorm = 0.4472, lr_0 = 3.5576e-04
Loss = 6.8963e-03, PNorm = 163.7904, GNorm = 0.1710, lr_0 = 3.5552e-04
Loss = 6.0544e-03, PNorm = 163.8051, GNorm = 0.2313, lr_0 = 3.5528e-04
Loss = 6.6003e-03, PNorm = 163.8190, GNorm = 0.1202, lr_0 = 3.5503e-04
Loss = 6.2437e-03, PNorm = 163.8370, GNorm = 0.1449, lr_0 = 3.5479e-04
Loss = 7.0905e-03, PNorm = 163.8515, GNorm = 0.1335, lr_0 = 3.5455e-04
Loss = 8.1743e-03, PNorm = 163.8676, GNorm = 0.2477, lr_0 = 3.5430e-04
Loss = 6.3775e-03, PNorm = 163.8810, GNorm = 0.3461, lr_0 = 3.5406e-04
Loss = 7.1032e-03, PNorm = 163.8950, GNorm = 0.1532, lr_0 = 3.5382e-04
Loss = 6.2785e-03, PNorm = 163.9023, GNorm = 0.2347, lr_0 = 3.5358e-04
Loss = 6.4873e-03, PNorm = 163.9156, GNorm = 0.2786, lr_0 = 3.5333e-04
Loss = 8.1761e-03, PNorm = 163.9303, GNorm = 0.3979, lr_0 = 3.5309e-04
Loss = 5.5937e-03, PNorm = 163.9483, GNorm = 0.1266, lr_0 = 3.5285e-04
Loss = 6.6380e-03, PNorm = 163.9689, GNorm = 0.4728, lr_0 = 3.5261e-04
Loss = 6.1935e-03, PNorm = 163.9887, GNorm = 0.1125, lr_0 = 3.5237e-04
Loss = 9.0722e-03, PNorm = 164.0058, GNorm = 0.2847, lr_0 = 3.5212e-04
Loss = 6.4755e-03, PNorm = 164.0191, GNorm = 0.1508, lr_0 = 3.5188e-04
Loss = 8.6336e-03, PNorm = 164.0320, GNorm = 0.2820, lr_0 = 3.5164e-04
Loss = 8.1383e-03, PNorm = 164.0455, GNorm = 0.3113, lr_0 = 3.5140e-04
Loss = 6.3056e-03, PNorm = 164.0651, GNorm = 0.2892, lr_0 = 3.5116e-04
Loss = 6.1481e-03, PNorm = 164.0795, GNorm = 0.4422, lr_0 = 3.5092e-04
Loss = 6.1850e-03, PNorm = 164.0936, GNorm = 0.2186, lr_0 = 3.5068e-04
Loss = 6.4615e-03, PNorm = 164.1103, GNorm = 0.3848, lr_0 = 3.5044e-04
Loss = 7.2296e-03, PNorm = 164.1264, GNorm = 0.1813, lr_0 = 3.5020e-04
Loss = 7.6800e-03, PNorm = 164.1406, GNorm = 0.1467, lr_0 = 3.4996e-04
Loss = 7.1913e-03, PNorm = 164.1568, GNorm = 0.3031, lr_0 = 3.4972e-04
Loss = 7.7971e-03, PNorm = 164.1730, GNorm = 0.1433, lr_0 = 3.4948e-04
Loss = 5.3572e-03, PNorm = 164.1947, GNorm = 0.1047, lr_0 = 3.4924e-04
Loss = 8.0543e-03, PNorm = 164.2093, GNorm = 0.1498, lr_0 = 3.4900e-04
Loss = 7.1996e-03, PNorm = 164.2232, GNorm = 0.2964, lr_0 = 3.4876e-04
Loss = 6.5936e-03, PNorm = 164.2409, GNorm = 0.1798, lr_0 = 3.4852e-04
Loss = 6.3888e-03, PNorm = 164.2616, GNorm = 0.1893, lr_0 = 3.4828e-04
Loss = 6.7072e-03, PNorm = 164.2777, GNorm = 0.3538, lr_0 = 3.4805e-04
Loss = 6.6652e-03, PNorm = 164.2911, GNorm = 0.2533, lr_0 = 3.4781e-04
Loss = 8.3228e-03, PNorm = 164.3081, GNorm = 0.5175, lr_0 = 3.4757e-04
Loss = 6.9987e-03, PNorm = 164.3274, GNorm = 0.2615, lr_0 = 3.4733e-04
Loss = 9.0178e-03, PNorm = 164.3475, GNorm = 0.1425, lr_0 = 3.4709e-04
Loss = 7.2421e-03, PNorm = 164.3644, GNorm = 0.2801, lr_0 = 3.4686e-04
Loss = 6.7999e-03, PNorm = 164.3806, GNorm = 0.3003, lr_0 = 3.4662e-04
Loss = 5.9196e-03, PNorm = 164.3939, GNorm = 0.0836, lr_0 = 3.4638e-04
Loss = 7.1014e-03, PNorm = 164.4034, GNorm = 0.0801, lr_0 = 3.4614e-04
Loss = 7.2549e-03, PNorm = 164.4188, GNorm = 0.1813, lr_0 = 3.4591e-04
Loss = 6.5394e-03, PNorm = 164.4357, GNorm = 0.1800, lr_0 = 3.4567e-04
Loss = 6.5620e-03, PNorm = 164.4529, GNorm = 0.1512, lr_0 = 3.4543e-04
Loss = 7.2335e-03, PNorm = 164.4701, GNorm = 0.2238, lr_0 = 3.4520e-04
Loss = 8.7379e-03, PNorm = 164.4882, GNorm = 0.1599, lr_0 = 3.4496e-04
Loss = 8.5025e-03, PNorm = 164.5053, GNorm = 0.1560, lr_0 = 3.4472e-04
Loss = 6.3258e-03, PNorm = 164.5235, GNorm = 0.0882, lr_0 = 3.4449e-04
Loss = 7.9080e-03, PNorm = 164.5381, GNorm = 1.0282, lr_0 = 3.4425e-04
Loss = 8.3692e-03, PNorm = 164.5559, GNorm = 0.1271, lr_0 = 3.4402e-04
Loss = 1.0656e-02, PNorm = 164.5713, GNorm = 0.3705, lr_0 = 3.4378e-04
Loss = 7.6027e-03, PNorm = 164.5870, GNorm = 0.3788, lr_0 = 3.4354e-04
Loss = 7.4775e-03, PNorm = 164.6044, GNorm = 0.1110, lr_0 = 3.4331e-04
Validation mae = 0.279064
Epoch 15
Loss = 5.8148e-03, PNorm = 164.6187, GNorm = 0.1123, lr_0 = 3.4307e-04
Loss = 5.4352e-03, PNorm = 164.6296, GNorm = 0.3038, lr_0 = 3.4284e-04
Loss = 5.5747e-03, PNorm = 164.6390, GNorm = 0.1392, lr_0 = 3.4260e-04
Loss = 7.3146e-03, PNorm = 164.6471, GNorm = 0.2067, lr_0 = 3.4237e-04
Loss = 6.3621e-03, PNorm = 164.6579, GNorm = 0.3264, lr_0 = 3.4213e-04
Loss = 6.3296e-03, PNorm = 164.6713, GNorm = 0.3001, lr_0 = 3.4190e-04
Loss = 4.9847e-03, PNorm = 164.6814, GNorm = 0.2993, lr_0 = 3.4167e-04
Loss = 6.0427e-03, PNorm = 164.6909, GNorm = 0.1109, lr_0 = 3.4143e-04
Loss = 5.7922e-03, PNorm = 164.7026, GNorm = 0.4225, lr_0 = 3.4120e-04
Loss = 7.0263e-03, PNorm = 164.7104, GNorm = 0.5131, lr_0 = 3.4096e-04
Loss = 6.6083e-03, PNorm = 164.7250, GNorm = 0.3260, lr_0 = 3.4073e-04
Loss = 6.8568e-03, PNorm = 164.7377, GNorm = 0.1119, lr_0 = 3.4050e-04
Loss = 6.7192e-03, PNorm = 164.7512, GNorm = 0.1403, lr_0 = 3.4026e-04
Loss = 7.4712e-03, PNorm = 164.7600, GNorm = 0.1333, lr_0 = 3.4003e-04
Loss = 6.8974e-03, PNorm = 164.7688, GNorm = 0.1198, lr_0 = 3.3980e-04
Loss = 6.9214e-03, PNorm = 164.7804, GNorm = 0.3287, lr_0 = 3.3956e-04
Loss = 5.9940e-03, PNorm = 164.7904, GNorm = 0.2152, lr_0 = 3.3933e-04
Loss = 9.2779e-03, PNorm = 164.8022, GNorm = 0.1560, lr_0 = 3.3910e-04
Loss = 5.8777e-03, PNorm = 164.8144, GNorm = 0.2540, lr_0 = 3.3887e-04
Loss = 6.2859e-03, PNorm = 164.8253, GNorm = 0.1715, lr_0 = 3.3864e-04
Loss = 6.7524e-03, PNorm = 164.8373, GNorm = 0.4663, lr_0 = 3.3840e-04
Loss = 5.6964e-03, PNorm = 164.8478, GNorm = 0.3210, lr_0 = 3.3817e-04
Loss = 5.2871e-03, PNorm = 164.8633, GNorm = 0.3843, lr_0 = 3.3794e-04
Loss = 5.9929e-03, PNorm = 164.8739, GNorm = 0.3960, lr_0 = 3.3771e-04
Loss = 6.1124e-03, PNorm = 164.8823, GNorm = 0.2505, lr_0 = 3.3748e-04
Loss = 7.5871e-03, PNorm = 164.8923, GNorm = 0.1059, lr_0 = 3.3725e-04
Loss = 7.0036e-03, PNorm = 164.9037, GNorm = 0.2017, lr_0 = 3.3701e-04
Loss = 6.0125e-03, PNorm = 164.9143, GNorm = 0.3101, lr_0 = 3.3678e-04
Loss = 4.8488e-03, PNorm = 164.9292, GNorm = 0.1669, lr_0 = 3.3655e-04
Loss = 5.0617e-03, PNorm = 164.9404, GNorm = 0.2185, lr_0 = 3.3632e-04
Loss = 5.9914e-03, PNorm = 164.9535, GNorm = 0.2058, lr_0 = 3.3609e-04
Loss = 5.0152e-03, PNorm = 164.9650, GNorm = 0.2577, lr_0 = 3.3586e-04
Loss = 5.2438e-03, PNorm = 164.9743, GNorm = 0.2799, lr_0 = 3.3563e-04
Loss = 5.5212e-03, PNorm = 164.9843, GNorm = 0.4730, lr_0 = 3.3540e-04
Loss = 5.3327e-03, PNorm = 164.9960, GNorm = 0.1822, lr_0 = 3.3517e-04
Loss = 5.6493e-03, PNorm = 165.0086, GNorm = 0.2872, lr_0 = 3.3494e-04
Loss = 5.6058e-03, PNorm = 165.0207, GNorm = 0.3762, lr_0 = 3.3471e-04
Loss = 4.8679e-03, PNorm = 165.0335, GNorm = 0.1993, lr_0 = 3.3448e-04
Loss = 7.7821e-03, PNorm = 165.0453, GNorm = 0.2016, lr_0 = 3.3425e-04
Loss = 6.3779e-03, PNorm = 165.0580, GNorm = 0.3740, lr_0 = 3.3403e-04
Loss = 6.0809e-03, PNorm = 165.0726, GNorm = 0.1860, lr_0 = 3.3380e-04
Loss = 7.3738e-03, PNorm = 165.0827, GNorm = 0.0963, lr_0 = 3.3357e-04
Loss = 6.6118e-03, PNorm = 165.0932, GNorm = 0.2994, lr_0 = 3.3334e-04
Loss = 6.6973e-03, PNorm = 165.1053, GNorm = 0.1586, lr_0 = 3.3311e-04
Loss = 6.6314e-03, PNorm = 165.1215, GNorm = 0.0950, lr_0 = 3.3288e-04
Loss = 5.5423e-03, PNorm = 165.1356, GNorm = 0.1200, lr_0 = 3.3265e-04
Loss = 6.7932e-03, PNorm = 165.1503, GNorm = 0.3621, lr_0 = 3.3243e-04
Loss = 7.6348e-03, PNorm = 165.1628, GNorm = 0.1692, lr_0 = 3.3220e-04
Loss = 5.9589e-03, PNorm = 165.1747, GNorm = 0.1435, lr_0 = 3.3197e-04
Loss = 8.4778e-03, PNorm = 165.1882, GNorm = 0.1325, lr_0 = 3.3174e-04
Loss = 5.8474e-03, PNorm = 165.2015, GNorm = 0.1385, lr_0 = 3.3152e-04
Loss = 6.0170e-03, PNorm = 165.2146, GNorm = 0.2516, lr_0 = 3.3129e-04
Loss = 4.9234e-03, PNorm = 165.2281, GNorm = 0.2240, lr_0 = 3.3106e-04
Loss = 6.4886e-03, PNorm = 165.2419, GNorm = 0.3021, lr_0 = 3.3084e-04
Loss = 8.5575e-03, PNorm = 165.2543, GNorm = 0.0774, lr_0 = 3.3061e-04
Loss = 6.6221e-03, PNorm = 165.2668, GNorm = 0.1471, lr_0 = 3.3038e-04
Loss = 5.1755e-03, PNorm = 165.2798, GNorm = 0.1654, lr_0 = 3.3016e-04
Loss = 5.8320e-03, PNorm = 165.2925, GNorm = 0.1691, lr_0 = 3.2993e-04
Loss = 5.1735e-03, PNorm = 165.3049, GNorm = 0.1584, lr_0 = 3.2970e-04
Loss = 4.9614e-03, PNorm = 165.3187, GNorm = 0.0739, lr_0 = 3.2948e-04
Loss = 6.3313e-03, PNorm = 165.3312, GNorm = 0.1340, lr_0 = 3.2925e-04
Loss = 5.5837e-03, PNorm = 165.3466, GNorm = 0.3505, lr_0 = 3.2903e-04
Loss = 6.1924e-03, PNorm = 165.3583, GNorm = 0.2202, lr_0 = 3.2880e-04
Loss = 6.5417e-03, PNorm = 165.3706, GNorm = 0.2507, lr_0 = 3.2858e-04
Loss = 6.6748e-03, PNorm = 165.3863, GNorm = 0.1398, lr_0 = 3.2835e-04
Loss = 5.7547e-03, PNorm = 165.3982, GNorm = 0.3193, lr_0 = 3.2813e-04
Loss = 4.8945e-03, PNorm = 165.4118, GNorm = 0.1149, lr_0 = 3.2790e-04
Loss = 6.7302e-03, PNorm = 165.4251, GNorm = 0.2432, lr_0 = 3.2768e-04
Loss = 7.1995e-03, PNorm = 165.4391, GNorm = 0.1990, lr_0 = 3.2745e-04
Loss = 5.7926e-03, PNorm = 165.4559, GNorm = 0.3642, lr_0 = 3.2723e-04
Loss = 6.6302e-03, PNorm = 165.4726, GNorm = 0.2576, lr_0 = 3.2700e-04
Loss = 8.1666e-03, PNorm = 165.4829, GNorm = 0.2487, lr_0 = 3.2678e-04
Loss = 6.1941e-03, PNorm = 165.4947, GNorm = 0.6270, lr_0 = 3.2656e-04
Loss = 6.3559e-03, PNorm = 165.5079, GNorm = 0.2848, lr_0 = 3.2633e-04
Loss = 8.0361e-03, PNorm = 165.5202, GNorm = 0.1871, lr_0 = 3.2611e-04
Loss = 5.3272e-03, PNorm = 165.5309, GNorm = 0.1840, lr_0 = 3.2589e-04
Loss = 6.0213e-03, PNorm = 165.5436, GNorm = 0.5510, lr_0 = 3.2566e-04
Loss = 7.4056e-03, PNorm = 165.5557, GNorm = 0.2447, lr_0 = 3.2544e-04
Loss = 4.5423e-03, PNorm = 165.5664, GNorm = 0.1994, lr_0 = 3.2522e-04
Loss = 7.3143e-03, PNorm = 165.5767, GNorm = 0.3004, lr_0 = 3.2499e-04
Loss = 4.9443e-03, PNorm = 165.5887, GNorm = 0.3682, lr_0 = 3.2477e-04
Loss = 7.2492e-03, PNorm = 165.6042, GNorm = 0.0818, lr_0 = 3.2455e-04
Loss = 6.8850e-03, PNorm = 165.6159, GNorm = 0.3489, lr_0 = 3.2433e-04
Loss = 5.7016e-03, PNorm = 165.6264, GNorm = 0.3292, lr_0 = 3.2410e-04
Loss = 5.8782e-03, PNorm = 165.6395, GNorm = 0.1319, lr_0 = 3.2388e-04
Loss = 4.6832e-03, PNorm = 165.6540, GNorm = 0.2954, lr_0 = 3.2366e-04
Loss = 4.8488e-03, PNorm = 165.6671, GNorm = 0.1073, lr_0 = 3.2344e-04
Loss = 5.9502e-03, PNorm = 165.6787, GNorm = 0.4326, lr_0 = 3.2322e-04
Loss = 6.2678e-03, PNorm = 165.6928, GNorm = 0.3656, lr_0 = 3.2300e-04
Loss = 6.1453e-03, PNorm = 165.7070, GNorm = 0.2493, lr_0 = 3.2277e-04
Loss = 6.3775e-03, PNorm = 165.7209, GNorm = 0.3068, lr_0 = 3.2255e-04
Loss = 4.8360e-03, PNorm = 165.7331, GNorm = 0.1985, lr_0 = 3.2233e-04
Loss = 5.0541e-03, PNorm = 165.7454, GNorm = 0.0878, lr_0 = 3.2211e-04
Loss = 5.8679e-03, PNorm = 165.7583, GNorm = 0.1126, lr_0 = 3.2189e-04
Loss = 5.7048e-03, PNorm = 165.7722, GNorm = 0.2905, lr_0 = 3.2167e-04
Loss = 6.7373e-03, PNorm = 165.7875, GNorm = 0.0946, lr_0 = 3.2145e-04
Loss = 6.3291e-03, PNorm = 165.8007, GNorm = 0.1284, lr_0 = 3.2123e-04
Loss = 6.8504e-03, PNorm = 165.8142, GNorm = 0.2316, lr_0 = 3.2101e-04
Loss = 5.3382e-03, PNorm = 165.8303, GNorm = 0.2083, lr_0 = 3.2079e-04
Loss = 7.7146e-03, PNorm = 165.8460, GNorm = 0.1169, lr_0 = 3.2057e-04
Loss = 5.9298e-03, PNorm = 165.8595, GNorm = 0.2263, lr_0 = 3.2035e-04
Loss = 6.0864e-03, PNorm = 165.8694, GNorm = 0.1923, lr_0 = 3.2013e-04
Loss = 1.2899e-02, PNorm = 165.8852, GNorm = 0.2531, lr_0 = 3.1991e-04
Loss = 4.9443e-03, PNorm = 165.8976, GNorm = 0.1253, lr_0 = 3.1969e-04
Loss = 7.3076e-03, PNorm = 165.9109, GNorm = 0.4825, lr_0 = 3.1947e-04
Loss = 4.5118e-03, PNorm = 165.9234, GNorm = 0.3128, lr_0 = 3.1925e-04
Loss = 6.9898e-03, PNorm = 165.9400, GNorm = 0.1392, lr_0 = 3.1904e-04
Loss = 5.9180e-03, PNorm = 165.9553, GNorm = 0.0653, lr_0 = 3.1882e-04
Loss = 6.6897e-03, PNorm = 165.9676, GNorm = 0.0904, lr_0 = 3.1860e-04
Loss = 7.1679e-03, PNorm = 165.9806, GNorm = 0.2950, lr_0 = 3.1838e-04
Loss = 5.6930e-03, PNorm = 165.9940, GNorm = 0.1264, lr_0 = 3.1816e-04
Loss = 5.4595e-03, PNorm = 166.0029, GNorm = 0.2130, lr_0 = 3.1794e-04
Loss = 5.7408e-03, PNorm = 166.0160, GNorm = 0.3676, lr_0 = 3.1773e-04
Loss = 8.6244e-03, PNorm = 166.0320, GNorm = 0.9166, lr_0 = 3.1751e-04
Loss = 5.7372e-03, PNorm = 166.0439, GNorm = 0.0953, lr_0 = 3.1729e-04
Loss = 6.0286e-03, PNorm = 166.0588, GNorm = 0.1374, lr_0 = 3.1707e-04
Loss = 4.2471e-03, PNorm = 166.0731, GNorm = 0.1952, lr_0 = 3.1686e-04
Loss = 6.0080e-03, PNorm = 166.0878, GNorm = 0.3495, lr_0 = 3.1664e-04
Loss = 5.5479e-03, PNorm = 166.0987, GNorm = 0.2760, lr_0 = 3.1642e-04
Loss = 6.9882e-03, PNorm = 166.1074, GNorm = 0.1540, lr_0 = 3.1621e-04
Validation mae = 0.279595
Epoch 16
Loss = 4.3771e-03, PNorm = 166.1176, GNorm = 0.1711, lr_0 = 3.1599e-04
Loss = 4.8856e-03, PNorm = 166.1256, GNorm = 0.2973, lr_0 = 3.1577e-04
Loss = 5.3592e-03, PNorm = 166.1330, GNorm = 0.5602, lr_0 = 3.1556e-04
Loss = 5.2163e-03, PNorm = 166.1428, GNorm = 0.1515, lr_0 = 3.1534e-04
Loss = 6.3159e-03, PNorm = 166.1549, GNorm = 0.2816, lr_0 = 3.1512e-04
Loss = 3.8173e-03, PNorm = 166.1646, GNorm = 0.1837, lr_0 = 3.1491e-04
Loss = 6.2758e-03, PNorm = 166.1736, GNorm = 0.2767, lr_0 = 3.1469e-04
Loss = 4.8725e-03, PNorm = 166.1802, GNorm = 0.3425, lr_0 = 3.1448e-04
Loss = 4.8078e-03, PNorm = 166.1853, GNorm = 0.2267, lr_0 = 3.1426e-04
Loss = 5.3458e-03, PNorm = 166.1905, GNorm = 0.4638, lr_0 = 3.1405e-04
Loss = 4.3405e-03, PNorm = 166.1992, GNorm = 0.3070, lr_0 = 3.1383e-04
Loss = 4.7396e-03, PNorm = 166.2104, GNorm = 0.1407, lr_0 = 3.1362e-04
Loss = 4.8851e-03, PNorm = 166.2226, GNorm = 0.0976, lr_0 = 3.1340e-04
Loss = 5.8241e-03, PNorm = 166.2351, GNorm = 0.2080, lr_0 = 3.1319e-04
Loss = 6.8642e-03, PNorm = 166.2438, GNorm = 0.8396, lr_0 = 3.1297e-04
Loss = 5.8093e-03, PNorm = 166.2529, GNorm = 0.6576, lr_0 = 3.1276e-04
Loss = 4.8811e-03, PNorm = 166.2663, GNorm = 0.0950, lr_0 = 3.1254e-04
Loss = 4.3653e-03, PNorm = 166.2769, GNorm = 0.1991, lr_0 = 3.1233e-04
Loss = 6.0700e-03, PNorm = 166.2864, GNorm = 0.1570, lr_0 = 3.1212e-04
Loss = 4.7999e-03, PNorm = 166.2942, GNorm = 0.1931, lr_0 = 3.1190e-04
Loss = 5.1641e-03, PNorm = 166.3027, GNorm = 0.1690, lr_0 = 3.1169e-04
Loss = 5.2885e-03, PNorm = 166.3112, GNorm = 0.3198, lr_0 = 3.1147e-04
Loss = 4.6538e-03, PNorm = 166.3189, GNorm = 0.0842, lr_0 = 3.1126e-04
Loss = 4.9749e-03, PNorm = 166.3278, GNorm = 0.1616, lr_0 = 3.1105e-04
Loss = 5.0507e-03, PNorm = 166.3385, GNorm = 0.4158, lr_0 = 3.1083e-04
Loss = 6.1257e-03, PNorm = 166.3479, GNorm = 0.3592, lr_0 = 3.1062e-04
Loss = 4.7037e-03, PNorm = 166.3602, GNorm = 0.1918, lr_0 = 3.1041e-04
Loss = 5.2612e-03, PNorm = 166.3717, GNorm = 0.1933, lr_0 = 3.1020e-04
Loss = 4.6153e-03, PNorm = 166.3813, GNorm = 0.2226, lr_0 = 3.0998e-04
Loss = 5.0754e-03, PNorm = 166.3893, GNorm = 0.1250, lr_0 = 3.0977e-04
Loss = 4.8812e-03, PNorm = 166.3996, GNorm = 0.1113, lr_0 = 3.0956e-04
Loss = 5.2279e-03, PNorm = 166.4086, GNorm = 0.2271, lr_0 = 3.0935e-04
Loss = 6.6260e-03, PNorm = 166.4186, GNorm = 0.1905, lr_0 = 3.0914e-04
Loss = 4.9339e-03, PNorm = 166.4297, GNorm = 0.1480, lr_0 = 3.0892e-04
Loss = 5.7724e-03, PNorm = 166.4387, GNorm = 0.3788, lr_0 = 3.0871e-04
Loss = 3.8069e-03, PNorm = 166.4478, GNorm = 0.1943, lr_0 = 3.0850e-04
Loss = 4.2283e-03, PNorm = 166.4569, GNorm = 0.2364, lr_0 = 3.0829e-04
Loss = 4.9468e-03, PNorm = 166.4672, GNorm = 0.1291, lr_0 = 3.0808e-04
Loss = 5.4134e-03, PNorm = 166.4746, GNorm = 0.0995, lr_0 = 3.0787e-04
Loss = 6.4871e-03, PNorm = 166.4849, GNorm = 0.0880, lr_0 = 3.0766e-04
Loss = 7.2096e-03, PNorm = 166.4960, GNorm = 0.1870, lr_0 = 3.0745e-04
Loss = 5.2272e-03, PNorm = 166.5078, GNorm = 0.3006, lr_0 = 3.0723e-04
Loss = 5.9009e-03, PNorm = 166.5220, GNorm = 0.2738, lr_0 = 3.0702e-04
Loss = 6.5960e-03, PNorm = 166.5329, GNorm = 0.2805, lr_0 = 3.0681e-04
Loss = 5.0437e-03, PNorm = 166.5432, GNorm = 0.2693, lr_0 = 3.0660e-04
Loss = 5.1133e-03, PNorm = 166.5522, GNorm = 0.1751, lr_0 = 3.0639e-04
Loss = 7.2614e-03, PNorm = 166.5661, GNorm = 0.1670, lr_0 = 3.0618e-04
Loss = 4.5664e-03, PNorm = 166.5774, GNorm = 0.1017, lr_0 = 3.0597e-04
Loss = 5.4389e-03, PNorm = 166.5886, GNorm = 0.1915, lr_0 = 3.0576e-04
Loss = 4.2296e-03, PNorm = 166.5994, GNorm = 0.2469, lr_0 = 3.0555e-04
Loss = 6.2153e-03, PNorm = 166.6082, GNorm = 0.2065, lr_0 = 3.0535e-04
Loss = 5.9634e-03, PNorm = 166.6184, GNorm = 0.1431, lr_0 = 3.0514e-04
Loss = 5.4087e-03, PNorm = 166.6314, GNorm = 0.3616, lr_0 = 3.0493e-04
Loss = 4.4402e-03, PNorm = 166.6422, GNorm = 0.2100, lr_0 = 3.0472e-04
Loss = 6.2010e-03, PNorm = 166.6538, GNorm = 0.1688, lr_0 = 3.0451e-04
Loss = 5.0155e-03, PNorm = 166.6633, GNorm = 0.1665, lr_0 = 3.0430e-04
Loss = 4.5897e-03, PNorm = 166.6738, GNorm = 0.1391, lr_0 = 3.0409e-04
Loss = 4.4762e-03, PNorm = 166.6815, GNorm = 0.3796, lr_0 = 3.0388e-04
Loss = 4.2941e-03, PNorm = 166.6890, GNorm = 0.1497, lr_0 = 3.0368e-04
Loss = 4.6550e-03, PNorm = 166.6951, GNorm = 0.3051, lr_0 = 3.0347e-04
Loss = 5.7623e-03, PNorm = 166.7070, GNorm = 0.2382, lr_0 = 3.0326e-04
Loss = 4.6293e-03, PNorm = 166.7195, GNorm = 0.2695, lr_0 = 3.0305e-04
Loss = 4.7125e-03, PNorm = 166.7290, GNorm = 0.7758, lr_0 = 3.0284e-04
Loss = 5.5420e-03, PNorm = 166.7371, GNorm = 0.1249, lr_0 = 3.0264e-04
Loss = 6.8512e-03, PNorm = 166.7458, GNorm = 0.2072, lr_0 = 3.0243e-04
Loss = 5.0522e-03, PNorm = 166.7573, GNorm = 0.3607, lr_0 = 3.0222e-04
Loss = 5.1174e-03, PNorm = 166.7679, GNorm = 0.2442, lr_0 = 3.0202e-04
Loss = 6.5403e-03, PNorm = 166.7812, GNorm = 0.1453, lr_0 = 3.0181e-04
Loss = 4.7451e-03, PNorm = 166.7907, GNorm = 0.1320, lr_0 = 3.0160e-04
Loss = 4.3664e-03, PNorm = 166.8027, GNorm = 0.1442, lr_0 = 3.0140e-04
Loss = 4.8917e-03, PNorm = 166.8148, GNorm = 0.1633, lr_0 = 3.0119e-04
Loss = 4.7700e-03, PNorm = 166.8272, GNorm = 0.0792, lr_0 = 3.0098e-04
Loss = 5.7490e-03, PNorm = 166.8392, GNorm = 0.0694, lr_0 = 3.0078e-04
Loss = 5.3701e-03, PNorm = 166.8488, GNorm = 0.0860, lr_0 = 3.0057e-04
Loss = 4.4340e-03, PNorm = 166.8597, GNorm = 0.2141, lr_0 = 3.0036e-04
Loss = 4.1486e-03, PNorm = 166.8684, GNorm = 0.3264, lr_0 = 3.0016e-04
Loss = 5.1361e-03, PNorm = 166.8775, GNorm = 0.2728, lr_0 = 2.9995e-04
Loss = 4.7269e-03, PNorm = 166.8864, GNorm = 0.1982, lr_0 = 2.9975e-04
Loss = 6.1364e-03, PNorm = 166.8984, GNorm = 0.0745, lr_0 = 2.9954e-04
Loss = 3.3972e-03, PNorm = 166.9091, GNorm = 0.1169, lr_0 = 2.9934e-04
Loss = 6.5699e-03, PNorm = 166.9186, GNorm = 0.0939, lr_0 = 2.9913e-04
Loss = 7.4132e-03, PNorm = 166.9264, GNorm = 0.2493, lr_0 = 2.9893e-04
Loss = 6.0796e-03, PNorm = 166.9356, GNorm = 0.1943, lr_0 = 2.9872e-04
Loss = 6.3064e-03, PNorm = 166.9448, GNorm = 0.2087, lr_0 = 2.9852e-04
Loss = 4.7398e-03, PNorm = 166.9579, GNorm = 0.1137, lr_0 = 2.9831e-04
Loss = 4.6518e-03, PNorm = 166.9660, GNorm = 0.2114, lr_0 = 2.9811e-04
Loss = 5.1443e-03, PNorm = 166.9779, GNorm = 0.2081, lr_0 = 2.9790e-04
Loss = 6.7068e-03, PNorm = 166.9891, GNorm = 0.2420, lr_0 = 2.9770e-04
Loss = 4.4644e-03, PNorm = 167.0018, GNorm = 0.1591, lr_0 = 2.9750e-04
Loss = 5.3992e-03, PNorm = 167.0117, GNorm = 0.1606, lr_0 = 2.9729e-04
Loss = 5.5131e-03, PNorm = 167.0203, GNorm = 0.1972, lr_0 = 2.9709e-04
Loss = 6.0272e-03, PNorm = 167.0317, GNorm = 0.1130, lr_0 = 2.9689e-04
Loss = 6.1349e-03, PNorm = 167.0457, GNorm = 0.3154, lr_0 = 2.9668e-04
Loss = 4.4414e-03, PNorm = 167.0600, GNorm = 0.2078, lr_0 = 2.9648e-04
Loss = 4.5758e-03, PNorm = 167.0742, GNorm = 0.2117, lr_0 = 2.9628e-04
Loss = 4.8581e-03, PNorm = 167.0844, GNorm = 0.2544, lr_0 = 2.9607e-04
Loss = 6.7898e-03, PNorm = 167.0954, GNorm = 0.0881, lr_0 = 2.9587e-04
Loss = 5.9726e-03, PNorm = 167.1067, GNorm = 0.2789, lr_0 = 2.9567e-04
Loss = 5.7714e-03, PNorm = 167.1168, GNorm = 0.1105, lr_0 = 2.9546e-04
Loss = 4.8360e-03, PNorm = 167.1265, GNorm = 0.1944, lr_0 = 2.9526e-04
Loss = 5.2717e-03, PNorm = 167.1369, GNorm = 0.1302, lr_0 = 2.9506e-04
Loss = 8.1301e-03, PNorm = 167.1477, GNorm = 0.1763, lr_0 = 2.9486e-04
Loss = 7.2032e-03, PNorm = 167.1590, GNorm = 0.5353, lr_0 = 2.9466e-04
Loss = 4.6619e-03, PNorm = 167.1721, GNorm = 0.0923, lr_0 = 2.9445e-04
Loss = 5.1942e-03, PNorm = 167.1847, GNorm = 0.1709, lr_0 = 2.9425e-04
Loss = 4.6258e-03, PNorm = 167.1979, GNorm = 0.1610, lr_0 = 2.9405e-04
Loss = 4.5717e-03, PNorm = 167.2084, GNorm = 0.1774, lr_0 = 2.9385e-04
Loss = 4.9979e-03, PNorm = 167.2183, GNorm = 0.2232, lr_0 = 2.9365e-04
Loss = 4.2374e-03, PNorm = 167.2267, GNorm = 0.1304, lr_0 = 2.9345e-04
Loss = 3.9076e-03, PNorm = 167.2356, GNorm = 0.2879, lr_0 = 2.9325e-04
Loss = 5.6799e-03, PNorm = 167.2466, GNorm = 0.4149, lr_0 = 2.9305e-04
Loss = 6.1679e-03, PNorm = 167.2590, GNorm = 0.1756, lr_0 = 2.9284e-04
Loss = 5.9414e-03, PNorm = 167.2716, GNorm = 0.1128, lr_0 = 2.9264e-04
Loss = 5.5525e-03, PNorm = 167.2780, GNorm = 0.1755, lr_0 = 2.9244e-04
Loss = 6.4243e-03, PNorm = 167.2865, GNorm = 0.2696, lr_0 = 2.9224e-04
Loss = 7.6552e-03, PNorm = 167.2973, GNorm = 0.1210, lr_0 = 2.9204e-04
Loss = 5.4834e-03, PNorm = 167.3096, GNorm = 0.0605, lr_0 = 2.9184e-04
Loss = 5.4463e-03, PNorm = 167.3238, GNorm = 0.1954, lr_0 = 2.9164e-04
Loss = 4.9692e-03, PNorm = 167.3371, GNorm = 0.1856, lr_0 = 2.9144e-04
Loss = 4.4009e-03, PNorm = 167.3534, GNorm = 0.2650, lr_0 = 2.9124e-04
Validation mae = 0.278747
Epoch 17
Loss = 3.8943e-03, PNorm = 167.3655, GNorm = 0.2890, lr_0 = 2.9104e-04
Loss = 4.1469e-03, PNorm = 167.3732, GNorm = 0.2921, lr_0 = 2.9084e-04
Loss = 4.2414e-03, PNorm = 167.3776, GNorm = 0.3857, lr_0 = 2.9065e-04
Loss = 4.2158e-03, PNorm = 167.3850, GNorm = 0.1827, lr_0 = 2.9045e-04
Loss = 5.5633e-03, PNorm = 167.3947, GNorm = 0.1279, lr_0 = 2.9025e-04
Loss = 4.5540e-03, PNorm = 167.4009, GNorm = 0.1006, lr_0 = 2.9005e-04
Loss = 4.2874e-03, PNorm = 167.4090, GNorm = 0.2977, lr_0 = 2.8985e-04
Loss = 4.7628e-03, PNorm = 167.4172, GNorm = 0.1535, lr_0 = 2.8965e-04
Loss = 4.0108e-03, PNorm = 167.4244, GNorm = 0.1362, lr_0 = 2.8945e-04
Loss = 3.7629e-03, PNorm = 167.4345, GNorm = 0.2718, lr_0 = 2.8925e-04
Loss = 4.3984e-03, PNorm = 167.4398, GNorm = 0.2194, lr_0 = 2.8906e-04
Loss = 4.1904e-03, PNorm = 167.4494, GNorm = 0.1429, lr_0 = 2.8886e-04
Loss = 3.8450e-03, PNorm = 167.4558, GNorm = 0.2044, lr_0 = 2.8866e-04
Loss = 3.9308e-03, PNorm = 167.4656, GNorm = 0.0532, lr_0 = 2.8846e-04
Loss = 4.5002e-03, PNorm = 167.4739, GNorm = 0.1379, lr_0 = 2.8826e-04
Loss = 3.4384e-03, PNorm = 167.4803, GNorm = 0.0591, lr_0 = 2.8807e-04
Loss = 3.8484e-03, PNorm = 167.4866, GNorm = 0.1456, lr_0 = 2.8787e-04
Loss = 4.9012e-03, PNorm = 167.4966, GNorm = 0.2713, lr_0 = 2.8767e-04
Loss = 3.5886e-03, PNorm = 167.5080, GNorm = 0.2039, lr_0 = 2.8748e-04
Loss = 3.7350e-03, PNorm = 167.5189, GNorm = 0.1547, lr_0 = 2.8728e-04
Loss = 3.7679e-03, PNorm = 167.5307, GNorm = 0.1986, lr_0 = 2.8708e-04
Loss = 6.2683e-03, PNorm = 167.5352, GNorm = 0.1305, lr_0 = 2.8689e-04
Loss = 3.7441e-03, PNorm = 167.5395, GNorm = 0.0908, lr_0 = 2.8669e-04
Loss = 4.4707e-03, PNorm = 167.5464, GNorm = 0.2834, lr_0 = 2.8649e-04
Loss = 4.4371e-03, PNorm = 167.5560, GNorm = 0.3442, lr_0 = 2.8630e-04
Loss = 5.3629e-03, PNorm = 167.5621, GNorm = 0.2903, lr_0 = 2.8610e-04
Loss = 4.6773e-03, PNorm = 167.5701, GNorm = 0.2361, lr_0 = 2.8590e-04
Loss = 6.4726e-03, PNorm = 167.5794, GNorm = 0.1402, lr_0 = 2.8571e-04
Loss = 3.6489e-03, PNorm = 167.5869, GNorm = 0.1824, lr_0 = 2.8551e-04
Loss = 5.2012e-03, PNorm = 167.5953, GNorm = 0.1972, lr_0 = 2.8532e-04
Loss = 3.9425e-03, PNorm = 167.6033, GNorm = 0.1935, lr_0 = 2.8512e-04
Loss = 4.5706e-03, PNorm = 167.6107, GNorm = 0.1692, lr_0 = 2.8493e-04
Loss = 4.6358e-03, PNorm = 167.6195, GNorm = 0.0730, lr_0 = 2.8473e-04
Loss = 6.1422e-03, PNorm = 167.6270, GNorm = 0.2406, lr_0 = 2.8454e-04
Loss = 4.4828e-03, PNorm = 167.6356, GNorm = 0.1597, lr_0 = 2.8434e-04
Loss = 3.8995e-03, PNorm = 167.6451, GNorm = 0.1649, lr_0 = 2.8415e-04
Loss = 3.6951e-03, PNorm = 167.6513, GNorm = 0.1608, lr_0 = 2.8395e-04
Loss = 4.1261e-03, PNorm = 167.6576, GNorm = 0.1300, lr_0 = 2.8376e-04
Loss = 4.6366e-03, PNorm = 167.6678, GNorm = 0.0629, lr_0 = 2.8356e-04
Loss = 3.6544e-03, PNorm = 167.6747, GNorm = 0.0885, lr_0 = 2.8337e-04
Loss = 5.3717e-03, PNorm = 167.6822, GNorm = 0.1323, lr_0 = 2.8317e-04
Loss = 5.0525e-03, PNorm = 167.6882, GNorm = 0.1734, lr_0 = 2.8298e-04
Loss = 4.5421e-03, PNorm = 167.6967, GNorm = 0.2521, lr_0 = 2.8279e-04
Loss = 4.5574e-03, PNorm = 167.7072, GNorm = 0.1866, lr_0 = 2.8259e-04
Loss = 3.7879e-03, PNorm = 167.7160, GNorm = 0.0663, lr_0 = 2.8240e-04
Loss = 3.9739e-03, PNorm = 167.7250, GNorm = 0.1193, lr_0 = 2.8221e-04
Loss = 6.7176e-03, PNorm = 167.7321, GNorm = 0.1411, lr_0 = 2.8201e-04
Loss = 4.0873e-03, PNorm = 167.7416, GNorm = 0.1685, lr_0 = 2.8182e-04
Loss = 6.0454e-03, PNorm = 167.7501, GNorm = 0.1916, lr_0 = 2.8163e-04
Loss = 4.6984e-03, PNorm = 167.7593, GNorm = 0.1168, lr_0 = 2.8143e-04
Loss = 4.7211e-03, PNorm = 167.7691, GNorm = 0.1593, lr_0 = 2.8124e-04
Loss = 3.4546e-03, PNorm = 167.7781, GNorm = 0.0828, lr_0 = 2.8105e-04
Loss = 5.0462e-03, PNorm = 167.7854, GNorm = 0.1993, lr_0 = 2.8085e-04
Loss = 3.4733e-03, PNorm = 167.7919, GNorm = 0.2053, lr_0 = 2.8066e-04
Loss = 6.2588e-03, PNorm = 167.8008, GNorm = 0.1644, lr_0 = 2.8047e-04
Loss = 3.6448e-03, PNorm = 167.8101, GNorm = 0.1818, lr_0 = 2.8028e-04
Loss = 4.7637e-03, PNorm = 167.8181, GNorm = 0.1562, lr_0 = 2.8009e-04
Loss = 4.5418e-03, PNorm = 167.8269, GNorm = 0.1262, lr_0 = 2.7989e-04
Loss = 4.8414e-03, PNorm = 167.8323, GNorm = 0.3219, lr_0 = 2.7970e-04
Loss = 3.7254e-03, PNorm = 167.8388, GNorm = 0.1780, lr_0 = 2.7951e-04
Loss = 3.1497e-03, PNorm = 167.8433, GNorm = 0.3087, lr_0 = 2.7932e-04
Loss = 4.2676e-03, PNorm = 167.8522, GNorm = 0.0971, lr_0 = 2.7913e-04
Loss = 3.8993e-03, PNorm = 167.8619, GNorm = 0.2929, lr_0 = 2.7894e-04
Loss = 4.8357e-03, PNorm = 167.8726, GNorm = 0.1792, lr_0 = 2.7875e-04
Loss = 4.0936e-03, PNorm = 167.8837, GNorm = 0.2838, lr_0 = 2.7855e-04
Loss = 3.8051e-03, PNorm = 167.8938, GNorm = 0.1534, lr_0 = 2.7836e-04
Loss = 3.8914e-03, PNorm = 167.9037, GNorm = 0.1108, lr_0 = 2.7817e-04
Loss = 6.1604e-03, PNorm = 167.9136, GNorm = 0.1579, lr_0 = 2.7798e-04
Loss = 4.0518e-03, PNorm = 167.9245, GNorm = 0.4167, lr_0 = 2.7779e-04
Loss = 5.0691e-03, PNorm = 167.9358, GNorm = 0.1721, lr_0 = 2.7760e-04
Loss = 5.0964e-03, PNorm = 167.9437, GNorm = 0.2466, lr_0 = 2.7741e-04
Loss = 4.6568e-03, PNorm = 167.9512, GNorm = 0.0608, lr_0 = 2.7722e-04
Loss = 4.3289e-03, PNorm = 167.9626, GNorm = 0.2287, lr_0 = 2.7703e-04
Loss = 4.7915e-03, PNorm = 167.9708, GNorm = 0.1594, lr_0 = 2.7684e-04
Loss = 4.5030e-03, PNorm = 167.9805, GNorm = 0.2854, lr_0 = 2.7665e-04
Loss = 4.4168e-03, PNorm = 167.9909, GNorm = 0.1264, lr_0 = 2.7646e-04
Loss = 3.2491e-03, PNorm = 167.9997, GNorm = 0.1048, lr_0 = 2.7627e-04
Loss = 4.5846e-03, PNorm = 168.0098, GNorm = 0.1267, lr_0 = 2.7608e-04
Loss = 5.8276e-03, PNorm = 168.0162, GNorm = 0.4014, lr_0 = 2.7590e-04
Loss = 5.0951e-03, PNorm = 168.0250, GNorm = 0.3490, lr_0 = 2.7571e-04
Loss = 3.7699e-03, PNorm = 168.0337, GNorm = 0.2443, lr_0 = 2.7552e-04
Loss = 4.0037e-03, PNorm = 168.0458, GNorm = 0.3113, lr_0 = 2.7533e-04
Loss = 4.5133e-03, PNorm = 168.0567, GNorm = 0.2420, lr_0 = 2.7514e-04
Loss = 4.7807e-03, PNorm = 168.0636, GNorm = 0.0535, lr_0 = 2.7495e-04
Loss = 7.0350e-03, PNorm = 168.0691, GNorm = 0.1092, lr_0 = 2.7476e-04
Loss = 4.8823e-03, PNorm = 168.0787, GNorm = 0.1706, lr_0 = 2.7457e-04
Loss = 5.8567e-03, PNorm = 168.0860, GNorm = 0.1997, lr_0 = 2.7439e-04
Loss = 3.6754e-03, PNorm = 168.0955, GNorm = 0.1818, lr_0 = 2.7420e-04
Loss = 3.7967e-03, PNorm = 168.1059, GNorm = 0.2249, lr_0 = 2.7401e-04
Loss = 4.1416e-03, PNorm = 168.1154, GNorm = 0.1512, lr_0 = 2.7382e-04
Loss = 8.3466e-03, PNorm = 168.1247, GNorm = 0.0842, lr_0 = 2.7364e-04
Loss = 6.4432e-03, PNorm = 168.1324, GNorm = 0.0981, lr_0 = 2.7345e-04
Loss = 5.0062e-03, PNorm = 168.1417, GNorm = 0.2252, lr_0 = 2.7326e-04
Loss = 3.8257e-03, PNorm = 168.1519, GNorm = 0.2374, lr_0 = 2.7307e-04
Loss = 4.2667e-03, PNorm = 168.1620, GNorm = 0.1324, lr_0 = 2.7289e-04
Loss = 4.1279e-03, PNorm = 168.1705, GNorm = 0.1584, lr_0 = 2.7270e-04
Loss = 5.6937e-03, PNorm = 168.1802, GNorm = 0.2215, lr_0 = 2.7251e-04
Loss = 4.5371e-03, PNorm = 168.1885, GNorm = 0.2012, lr_0 = 2.7233e-04
Loss = 4.2705e-03, PNorm = 168.1974, GNorm = 0.1654, lr_0 = 2.7214e-04
Loss = 5.3641e-03, PNorm = 168.2056, GNorm = 0.1466, lr_0 = 2.7195e-04
Loss = 3.8373e-03, PNorm = 168.2135, GNorm = 0.2669, lr_0 = 2.7177e-04
Loss = 4.8497e-03, PNorm = 168.2204, GNorm = 0.1130, lr_0 = 2.7158e-04
Loss = 4.4560e-03, PNorm = 168.2276, GNorm = 0.2630, lr_0 = 2.7139e-04
Loss = 5.3420e-03, PNorm = 168.2396, GNorm = 0.0934, lr_0 = 2.7121e-04
Loss = 4.8532e-03, PNorm = 168.2467, GNorm = 0.2987, lr_0 = 2.7102e-04
Loss = 6.4660e-03, PNorm = 168.2560, GNorm = 0.1700, lr_0 = 2.7084e-04
Loss = 3.7113e-03, PNorm = 168.2633, GNorm = 0.1763, lr_0 = 2.7065e-04
Loss = 4.1309e-03, PNorm = 168.2719, GNorm = 0.1634, lr_0 = 2.7047e-04
Loss = 3.2269e-03, PNorm = 168.2828, GNorm = 0.1246, lr_0 = 2.7028e-04
Loss = 3.4516e-03, PNorm = 168.2945, GNorm = 0.0497, lr_0 = 2.7010e-04
Loss = 3.9516e-03, PNorm = 168.3033, GNorm = 0.1656, lr_0 = 2.6991e-04
Loss = 6.6913e-03, PNorm = 168.3158, GNorm = 0.3149, lr_0 = 2.6973e-04
Loss = 4.1242e-03, PNorm = 168.3239, GNorm = 0.1117, lr_0 = 2.6954e-04
Loss = 6.0106e-03, PNorm = 168.3341, GNorm = 0.3843, lr_0 = 2.6936e-04
Loss = 4.8744e-03, PNorm = 168.3479, GNorm = 0.1106, lr_0 = 2.6917e-04
Loss = 3.9381e-03, PNorm = 168.3607, GNorm = 0.0588, lr_0 = 2.6899e-04
Loss = 3.9331e-03, PNorm = 168.3717, GNorm = 0.3561, lr_0 = 2.6880e-04
Loss = 4.4801e-03, PNorm = 168.3795, GNorm = 0.0758, lr_0 = 2.6862e-04
Loss = 5.4210e-03, PNorm = 168.3866, GNorm = 0.3383, lr_0 = 2.6844e-04
Loss = 5.9373e-03, PNorm = 168.3961, GNorm = 0.1346, lr_0 = 2.6825e-04
Validation mae = 0.278016
Epoch 18
Loss = 3.6364e-03, PNorm = 168.4051, GNorm = 0.0571, lr_0 = 2.6807e-04
Loss = 3.5930e-03, PNorm = 168.4133, GNorm = 0.2435, lr_0 = 2.6788e-04
Loss = 3.9281e-03, PNorm = 168.4140, GNorm = 0.1160, lr_0 = 2.6770e-04
Loss = 4.0205e-03, PNorm = 168.4184, GNorm = 0.0883, lr_0 = 2.6752e-04
Loss = 3.4725e-03, PNorm = 168.4227, GNorm = 0.1661, lr_0 = 2.6733e-04
Loss = 3.3870e-03, PNorm = 168.4301, GNorm = 0.1500, lr_0 = 2.6715e-04
Loss = 3.1375e-03, PNorm = 168.4384, GNorm = 0.1912, lr_0 = 2.6697e-04
Loss = 3.9822e-03, PNorm = 168.4453, GNorm = 0.0969, lr_0 = 2.6678e-04
Loss = 3.7047e-03, PNorm = 168.4514, GNorm = 0.0932, lr_0 = 2.6660e-04
Loss = 4.1190e-03, PNorm = 168.4583, GNorm = 0.0659, lr_0 = 2.6642e-04
Loss = 3.8000e-03, PNorm = 168.4652, GNorm = 0.1932, lr_0 = 2.6624e-04
Loss = 5.6590e-03, PNorm = 168.4713, GNorm = 0.1848, lr_0 = 2.6605e-04
Loss = 3.7003e-03, PNorm = 168.4768, GNorm = 0.0750, lr_0 = 2.6587e-04
Loss = 4.6367e-03, PNorm = 168.4815, GNorm = 0.1807, lr_0 = 2.6569e-04
Loss = 3.7936e-03, PNorm = 168.4914, GNorm = 0.0543, lr_0 = 2.6551e-04
Loss = 3.0304e-03, PNorm = 168.4969, GNorm = 0.1116, lr_0 = 2.6533e-04
Loss = 4.4015e-03, PNorm = 168.5033, GNorm = 0.0804, lr_0 = 2.6514e-04
Loss = 3.5170e-03, PNorm = 168.5108, GNorm = 0.1987, lr_0 = 2.6496e-04
Loss = 3.6266e-03, PNorm = 168.5172, GNorm = 0.6337, lr_0 = 2.6478e-04
Loss = 4.8013e-03, PNorm = 168.5220, GNorm = 0.0553, lr_0 = 2.6460e-04
Loss = 5.2240e-03, PNorm = 168.5271, GNorm = 0.1796, lr_0 = 2.6442e-04
Loss = 3.1129e-03, PNorm = 168.5350, GNorm = 0.3124, lr_0 = 2.6424e-04
Loss = 3.5577e-03, PNorm = 168.5435, GNorm = 0.0684, lr_0 = 2.6406e-04
Loss = 4.0046e-03, PNorm = 168.5491, GNorm = 0.3241, lr_0 = 2.6388e-04
Loss = 3.3201e-03, PNorm = 168.5536, GNorm = 0.0837, lr_0 = 2.6369e-04
Loss = 4.0643e-03, PNorm = 168.5607, GNorm = 0.1621, lr_0 = 2.6351e-04
Loss = 4.4617e-03, PNorm = 168.5687, GNorm = 0.2677, lr_0 = 2.6333e-04
Loss = 3.7662e-03, PNorm = 168.5750, GNorm = 0.2241, lr_0 = 2.6315e-04
Loss = 3.0868e-03, PNorm = 168.5818, GNorm = 0.0881, lr_0 = 2.6297e-04
Loss = 3.6041e-03, PNorm = 168.5902, GNorm = 0.1812, lr_0 = 2.6279e-04
Loss = 5.0125e-03, PNorm = 168.5993, GNorm = 0.2216, lr_0 = 2.6261e-04
Loss = 3.7862e-03, PNorm = 168.6102, GNorm = 0.1083, lr_0 = 2.6243e-04
Loss = 5.0449e-03, PNorm = 168.6182, GNorm = 0.1301, lr_0 = 2.6225e-04
Loss = 3.8257e-03, PNorm = 168.6238, GNorm = 0.0782, lr_0 = 2.6207e-04
Loss = 3.6670e-03, PNorm = 168.6266, GNorm = 0.1449, lr_0 = 2.6189e-04
Loss = 3.2296e-03, PNorm = 168.6313, GNorm = 0.0981, lr_0 = 2.6171e-04
Loss = 2.9532e-03, PNorm = 168.6387, GNorm = 0.1202, lr_0 = 2.6153e-04
Loss = 3.3945e-03, PNorm = 168.6470, GNorm = 0.1425, lr_0 = 2.6136e-04
Loss = 4.8502e-03, PNorm = 168.6535, GNorm = 0.2174, lr_0 = 2.6118e-04
Loss = 3.9043e-03, PNorm = 168.6585, GNorm = 0.3288, lr_0 = 2.6100e-04
Loss = 3.8899e-03, PNorm = 168.6656, GNorm = 0.7824, lr_0 = 2.6082e-04
Loss = 3.4129e-03, PNorm = 168.6730, GNorm = 0.4171, lr_0 = 2.6064e-04
Loss = 5.6781e-03, PNorm = 168.6824, GNorm = 0.3005, lr_0 = 2.6046e-04
Loss = 4.3552e-03, PNorm = 168.6897, GNorm = 0.6441, lr_0 = 2.6028e-04
Loss = 3.8337e-03, PNorm = 168.6993, GNorm = 0.2408, lr_0 = 2.6011e-04
Loss = 7.1161e-03, PNorm = 168.7053, GNorm = 0.1343, lr_0 = 2.5993e-04
Loss = 3.0330e-03, PNorm = 168.7138, GNorm = 0.0522, lr_0 = 2.5975e-04
Loss = 3.3663e-03, PNorm = 168.7201, GNorm = 0.0718, lr_0 = 2.5957e-04
Loss = 4.6799e-03, PNorm = 168.7261, GNorm = 0.4037, lr_0 = 2.5939e-04
Loss = 4.1158e-03, PNorm = 168.7331, GNorm = 0.1640, lr_0 = 2.5922e-04
Loss = 3.5780e-03, PNorm = 168.7400, GNorm = 0.1999, lr_0 = 2.5904e-04
Loss = 3.3292e-03, PNorm = 168.7492, GNorm = 0.1933, lr_0 = 2.5886e-04
Loss = 4.5517e-03, PNorm = 168.7559, GNorm = 0.2010, lr_0 = 2.5868e-04
Loss = 3.0132e-03, PNorm = 168.7637, GNorm = 0.0840, lr_0 = 2.5851e-04
Loss = 4.8673e-03, PNorm = 168.7724, GNorm = 0.1448, lr_0 = 2.5833e-04
Loss = 3.4873e-03, PNorm = 168.7802, GNorm = 0.2217, lr_0 = 2.5815e-04
Loss = 3.7195e-03, PNorm = 168.7857, GNorm = 0.0673, lr_0 = 2.5797e-04
Loss = 3.1557e-03, PNorm = 168.7917, GNorm = 0.3695, lr_0 = 2.5780e-04
Loss = 3.9705e-03, PNorm = 168.7980, GNorm = 0.1839, lr_0 = 2.5762e-04
Loss = 3.5873e-03, PNorm = 168.8068, GNorm = 0.2557, lr_0 = 2.5745e-04
Loss = 3.3868e-03, PNorm = 168.8159, GNorm = 0.2663, lr_0 = 2.5727e-04
Loss = 4.3014e-03, PNorm = 168.8209, GNorm = 0.1733, lr_0 = 2.5709e-04
Loss = 3.7260e-03, PNorm = 168.8269, GNorm = 0.1002, lr_0 = 2.5692e-04
Loss = 4.8507e-03, PNorm = 168.8338, GNorm = 0.1590, lr_0 = 2.5674e-04
Loss = 3.7987e-03, PNorm = 168.8383, GNorm = 0.1484, lr_0 = 2.5656e-04
Loss = 3.5166e-03, PNorm = 168.8459, GNorm = 0.2788, lr_0 = 2.5639e-04
Loss = 4.5798e-03, PNorm = 168.8548, GNorm = 0.2015, lr_0 = 2.5621e-04
Loss = 4.9553e-03, PNorm = 168.8640, GNorm = 0.2105, lr_0 = 2.5604e-04
Loss = 3.1032e-03, PNorm = 168.8710, GNorm = 0.1316, lr_0 = 2.5586e-04
Loss = 3.4208e-03, PNorm = 168.8765, GNorm = 0.0890, lr_0 = 2.5569e-04
Loss = 5.0748e-03, PNorm = 168.8845, GNorm = 0.0752, lr_0 = 2.5551e-04
Loss = 5.0502e-03, PNorm = 168.8938, GNorm = 0.2074, lr_0 = 2.5534e-04
Loss = 5.1048e-03, PNorm = 168.9040, GNorm = 0.5133, lr_0 = 2.5516e-04
Loss = 6.4345e-03, PNorm = 168.9109, GNorm = 0.2016, lr_0 = 2.5499e-04
Loss = 3.8300e-03, PNorm = 168.9199, GNorm = 0.1533, lr_0 = 2.5481e-04
Loss = 4.1619e-03, PNorm = 168.9269, GNorm = 0.1588, lr_0 = 2.5464e-04
Loss = 4.6088e-03, PNorm = 168.9318, GNorm = 0.1498, lr_0 = 2.5446e-04
Loss = 3.7277e-03, PNorm = 168.9361, GNorm = 0.1126, lr_0 = 2.5429e-04
Loss = 3.0568e-03, PNorm = 168.9446, GNorm = 0.1179, lr_0 = 2.5411e-04
Loss = 3.3379e-03, PNorm = 168.9528, GNorm = 0.1163, lr_0 = 2.5394e-04
Loss = 3.7107e-03, PNorm = 168.9591, GNorm = 0.1982, lr_0 = 2.5377e-04
Loss = 3.8971e-03, PNorm = 168.9690, GNorm = 0.1711, lr_0 = 2.5359e-04
Loss = 2.7682e-03, PNorm = 168.9768, GNorm = 0.2500, lr_0 = 2.5342e-04
Loss = 3.9321e-03, PNorm = 168.9871, GNorm = 0.1448, lr_0 = 2.5325e-04
Loss = 4.6710e-03, PNorm = 168.9952, GNorm = 0.0596, lr_0 = 2.5307e-04
Loss = 3.8310e-03, PNorm = 169.0011, GNorm = 0.4569, lr_0 = 2.5290e-04
Loss = 5.0089e-03, PNorm = 169.0091, GNorm = 0.1704, lr_0 = 2.5273e-04
Loss = 5.1799e-03, PNorm = 169.0177, GNorm = 0.0843, lr_0 = 2.5255e-04
Loss = 3.5389e-03, PNorm = 169.0268, GNorm = 0.1814, lr_0 = 2.5238e-04
Loss = 3.5919e-03, PNorm = 169.0342, GNorm = 0.2653, lr_0 = 2.5221e-04
Loss = 3.7565e-03, PNorm = 169.0417, GNorm = 0.2935, lr_0 = 2.5203e-04
Loss = 3.4537e-03, PNorm = 169.0507, GNorm = 0.0935, lr_0 = 2.5186e-04
Loss = 3.8307e-03, PNorm = 169.0602, GNorm = 0.0822, lr_0 = 2.5169e-04
Loss = 3.6230e-03, PNorm = 169.0709, GNorm = 0.2685, lr_0 = 2.5152e-04
Loss = 5.8901e-03, PNorm = 169.0779, GNorm = 0.1383, lr_0 = 2.5134e-04
Loss = 5.1477e-03, PNorm = 169.0858, GNorm = 0.3528, lr_0 = 2.5117e-04
Loss = 3.7658e-03, PNorm = 169.0915, GNorm = 0.2366, lr_0 = 2.5100e-04
Loss = 3.0757e-03, PNorm = 169.1007, GNorm = 0.1305, lr_0 = 2.5083e-04
Loss = 3.2173e-03, PNorm = 169.1081, GNorm = 0.0790, lr_0 = 2.5066e-04
Loss = 3.5362e-03, PNorm = 169.1132, GNorm = 0.2984, lr_0 = 2.5048e-04
Loss = 4.4614e-03, PNorm = 169.1193, GNorm = 0.1724, lr_0 = 2.5031e-04
Loss = 3.0595e-03, PNorm = 169.1252, GNorm = 0.1967, lr_0 = 2.5014e-04
Loss = 3.1389e-03, PNorm = 169.1317, GNorm = 0.0984, lr_0 = 2.4997e-04
Loss = 2.9996e-03, PNorm = 169.1409, GNorm = 0.1552, lr_0 = 2.4980e-04
Loss = 4.0027e-03, PNorm = 169.1479, GNorm = 0.2417, lr_0 = 2.4963e-04
Loss = 4.6759e-03, PNorm = 169.1571, GNorm = 0.1146, lr_0 = 2.4946e-04
Loss = 4.6068e-03, PNorm = 169.1661, GNorm = 0.3702, lr_0 = 2.4929e-04
Loss = 4.5570e-03, PNorm = 169.1747, GNorm = 0.3260, lr_0 = 2.4911e-04
Loss = 5.0788e-03, PNorm = 169.1825, GNorm = 0.0837, lr_0 = 2.4894e-04
Loss = 3.5455e-03, PNorm = 169.1893, GNorm = 0.1618, lr_0 = 2.4877e-04
Loss = 6.3075e-03, PNorm = 169.2005, GNorm = 0.1517, lr_0 = 2.4860e-04
Loss = 2.7953e-03, PNorm = 169.2104, GNorm = 0.2663, lr_0 = 2.4843e-04
Loss = 3.2544e-03, PNorm = 169.2167, GNorm = 0.1335, lr_0 = 2.4826e-04
Loss = 4.0301e-03, PNorm = 169.2238, GNorm = 0.0826, lr_0 = 2.4809e-04
Loss = 3.5399e-03, PNorm = 169.2304, GNorm = 0.1726, lr_0 = 2.4792e-04
Loss = 3.9700e-03, PNorm = 169.2385, GNorm = 0.0769, lr_0 = 2.4775e-04
Loss = 3.8487e-03, PNorm = 169.2477, GNorm = 0.1902, lr_0 = 2.4758e-04
Loss = 2.8922e-03, PNorm = 169.2545, GNorm = 0.3983, lr_0 = 2.4741e-04
Loss = 3.7405e-03, PNorm = 169.2621, GNorm = 0.1392, lr_0 = 2.4724e-04
Loss = 3.4251e-03, PNorm = 169.2695, GNorm = 0.3714, lr_0 = 2.4707e-04
Validation mae = 0.278477
Epoch 19
Loss = 3.4153e-03, PNorm = 169.2757, GNorm = 0.2247, lr_0 = 2.4690e-04
Loss = 4.3772e-03, PNorm = 169.2809, GNorm = 0.1705, lr_0 = 2.4674e-04
Loss = 4.5270e-03, PNorm = 169.2870, GNorm = 0.1567, lr_0 = 2.4657e-04
Loss = 3.1886e-03, PNorm = 169.2925, GNorm = 0.1209, lr_0 = 2.4640e-04
Loss = 3.3568e-03, PNorm = 169.3001, GNorm = 0.2015, lr_0 = 2.4623e-04
Loss = 3.9982e-03, PNorm = 169.3065, GNorm = 0.2540, lr_0 = 2.4606e-04
Loss = 5.0833e-03, PNorm = 169.3113, GNorm = 0.1890, lr_0 = 2.4589e-04
Loss = 4.0834e-03, PNorm = 169.3172, GNorm = 0.1305, lr_0 = 2.4572e-04
Loss = 3.0584e-03, PNorm = 169.3221, GNorm = 0.1534, lr_0 = 2.4556e-04
Loss = 2.4460e-03, PNorm = 169.3283, GNorm = 0.2600, lr_0 = 2.4539e-04
Loss = 2.7013e-03, PNorm = 169.3324, GNorm = 0.1832, lr_0 = 2.4522e-04
Loss = 2.6593e-03, PNorm = 169.3364, GNorm = 0.0957, lr_0 = 2.4505e-04
Loss = 3.3585e-03, PNorm = 169.3422, GNorm = 0.3060, lr_0 = 2.4488e-04
Loss = 4.1781e-03, PNorm = 169.3472, GNorm = 0.1998, lr_0 = 2.4472e-04
Loss = 3.5896e-03, PNorm = 169.3560, GNorm = 0.0794, lr_0 = 2.4455e-04
Loss = 2.9876e-03, PNorm = 169.3625, GNorm = 0.0628, lr_0 = 2.4438e-04
Loss = 3.9329e-03, PNorm = 169.3680, GNorm = 0.1622, lr_0 = 2.4421e-04
Loss = 2.8276e-03, PNorm = 169.3721, GNorm = 0.1753, lr_0 = 2.4405e-04
Loss = 2.4726e-03, PNorm = 169.3765, GNorm = 0.0697, lr_0 = 2.4388e-04
Loss = 3.4547e-03, PNorm = 169.3799, GNorm = 0.3462, lr_0 = 2.4371e-04
Loss = 2.6336e-03, PNorm = 169.3844, GNorm = 0.1595, lr_0 = 2.4354e-04
Loss = 3.9283e-03, PNorm = 169.3915, GNorm = 0.0653, lr_0 = 2.4338e-04
Loss = 3.5000e-03, PNorm = 169.3990, GNorm = 0.2486, lr_0 = 2.4321e-04
Loss = 2.6617e-03, PNorm = 169.4047, GNorm = 0.2124, lr_0 = 2.4304e-04
Loss = 3.2880e-03, PNorm = 169.4139, GNorm = 0.2909, lr_0 = 2.4288e-04
Loss = 2.5534e-03, PNorm = 169.4202, GNorm = 0.1587, lr_0 = 2.4271e-04
Loss = 3.8117e-03, PNorm = 169.4233, GNorm = 0.1667, lr_0 = 2.4254e-04
Loss = 2.4664e-03, PNorm = 169.4275, GNorm = 0.2159, lr_0 = 2.4238e-04
Loss = 3.1290e-03, PNorm = 169.4323, GNorm = 0.0501, lr_0 = 2.4221e-04
Loss = 4.4026e-03, PNorm = 169.4395, GNorm = 0.1612, lr_0 = 2.4205e-04
Loss = 2.9601e-03, PNorm = 169.4463, GNorm = 0.1415, lr_0 = 2.4188e-04
Loss = 3.0213e-03, PNorm = 169.4513, GNorm = 0.1259, lr_0 = 2.4171e-04
Loss = 2.9202e-03, PNorm = 169.4560, GNorm = 0.0704, lr_0 = 2.4155e-04
Loss = 4.5049e-03, PNorm = 169.4596, GNorm = 0.0926, lr_0 = 2.4138e-04
Loss = 3.8575e-03, PNorm = 169.4647, GNorm = 0.1339, lr_0 = 2.4122e-04
Loss = 3.2847e-03, PNorm = 169.4722, GNorm = 0.0706, lr_0 = 2.4105e-04
Loss = 4.5264e-03, PNorm = 169.4789, GNorm = 0.1116, lr_0 = 2.4089e-04
Loss = 3.1969e-03, PNorm = 169.4877, GNorm = 0.1960, lr_0 = 2.4072e-04
Loss = 3.0048e-03, PNorm = 169.4960, GNorm = 0.0755, lr_0 = 2.4056e-04
Loss = 2.8023e-03, PNorm = 169.5024, GNorm = 0.1165, lr_0 = 2.4039e-04
Loss = 7.0642e-03, PNorm = 169.5076, GNorm = 0.4057, lr_0 = 2.4023e-04
Loss = 2.8888e-03, PNorm = 169.5118, GNorm = 0.1024, lr_0 = 2.4006e-04
Loss = 4.1554e-03, PNorm = 169.5183, GNorm = 0.1111, lr_0 = 2.3990e-04
Loss = 2.9412e-03, PNorm = 169.5224, GNorm = 0.0655, lr_0 = 2.3974e-04
Loss = 3.8321e-03, PNorm = 169.5296, GNorm = 0.1949, lr_0 = 2.3957e-04
Loss = 2.6821e-03, PNorm = 169.5330, GNorm = 0.3161, lr_0 = 2.3941e-04
Loss = 3.1643e-03, PNorm = 169.5384, GNorm = 0.0860, lr_0 = 2.3924e-04
Loss = 3.0899e-03, PNorm = 169.5459, GNorm = 0.1766, lr_0 = 2.3908e-04
Loss = 3.9897e-03, PNorm = 169.5556, GNorm = 0.1111, lr_0 = 2.3892e-04
Loss = 2.8967e-03, PNorm = 169.5594, GNorm = 0.2034, lr_0 = 2.3875e-04
Loss = 6.3269e-03, PNorm = 169.5624, GNorm = 0.1877, lr_0 = 2.3859e-04
Loss = 3.4603e-03, PNorm = 169.5693, GNorm = 0.0854, lr_0 = 2.3842e-04
Loss = 2.2023e-03, PNorm = 169.5744, GNorm = 0.0477, lr_0 = 2.3826e-04
Loss = 2.6623e-03, PNorm = 169.5810, GNorm = 0.1352, lr_0 = 2.3810e-04
Loss = 3.5709e-03, PNorm = 169.5843, GNorm = 0.1118, lr_0 = 2.3794e-04
Loss = 2.5224e-03, PNorm = 169.5900, GNorm = 0.1510, lr_0 = 2.3777e-04
Loss = 3.4231e-03, PNorm = 169.5944, GNorm = 0.2806, lr_0 = 2.3761e-04
Loss = 3.4884e-03, PNorm = 169.6010, GNorm = 0.2788, lr_0 = 2.3745e-04
Loss = 4.9860e-03, PNorm = 169.6090, GNorm = 0.2208, lr_0 = 2.3728e-04
Loss = 4.7514e-03, PNorm = 169.6158, GNorm = 0.3000, lr_0 = 2.3712e-04
Loss = 3.5603e-03, PNorm = 169.6230, GNorm = 0.1652, lr_0 = 2.3696e-04
Loss = 2.5993e-03, PNorm = 169.6314, GNorm = 0.1326, lr_0 = 2.3680e-04
Loss = 4.4544e-03, PNorm = 169.6371, GNorm = 0.1966, lr_0 = 2.3663e-04
Loss = 3.3789e-03, PNorm = 169.6414, GNorm = 0.5251, lr_0 = 2.3647e-04
Loss = 4.6582e-03, PNorm = 169.6439, GNorm = 0.2098, lr_0 = 2.3631e-04
Loss = 3.2511e-03, PNorm = 169.6498, GNorm = 0.1331, lr_0 = 2.3615e-04
Loss = 2.6814e-03, PNorm = 169.6569, GNorm = 0.3034, lr_0 = 2.3599e-04
Loss = 2.8125e-03, PNorm = 169.6660, GNorm = 0.1223, lr_0 = 2.3582e-04
Loss = 3.4763e-03, PNorm = 169.6744, GNorm = 0.1126, lr_0 = 2.3566e-04
Loss = 3.3183e-03, PNorm = 169.6812, GNorm = 0.4995, lr_0 = 2.3550e-04
Loss = 3.2194e-03, PNorm = 169.6859, GNorm = 0.1834, lr_0 = 2.3534e-04
Loss = 4.8932e-03, PNorm = 169.6955, GNorm = 0.4477, lr_0 = 2.3518e-04
Loss = 2.7320e-03, PNorm = 169.7029, GNorm = 0.0962, lr_0 = 2.3502e-04
Loss = 3.4561e-03, PNorm = 169.7081, GNorm = 0.0820, lr_0 = 2.3486e-04
Loss = 3.5736e-03, PNorm = 169.7169, GNorm = 0.3351, lr_0 = 2.3470e-04
Loss = 3.6078e-03, PNorm = 169.7229, GNorm = 0.2841, lr_0 = 2.3454e-04
Loss = 2.4088e-03, PNorm = 169.7281, GNorm = 0.0546, lr_0 = 2.3437e-04
Loss = 3.2035e-03, PNorm = 169.7331, GNorm = 0.0676, lr_0 = 2.3421e-04
Loss = 2.7526e-03, PNorm = 169.7378, GNorm = 0.2551, lr_0 = 2.3405e-04
Loss = 3.7768e-03, PNorm = 169.7452, GNorm = 0.1530, lr_0 = 2.3389e-04
Loss = 3.8813e-03, PNorm = 169.7531, GNorm = 0.2019, lr_0 = 2.3373e-04
Loss = 7.4327e-03, PNorm = 169.7594, GNorm = 1.0656, lr_0 = 2.3357e-04
Loss = 2.9614e-03, PNorm = 169.7661, GNorm = 0.1192, lr_0 = 2.3341e-04
Loss = 2.8979e-03, PNorm = 169.7775, GNorm = 0.2340, lr_0 = 2.3325e-04
Loss = 3.1989e-03, PNorm = 169.7848, GNorm = 0.1781, lr_0 = 2.3309e-04
Loss = 3.4331e-03, PNorm = 169.7896, GNorm = 0.3235, lr_0 = 2.3293e-04
Loss = 3.9992e-03, PNorm = 169.7944, GNorm = 0.1191, lr_0 = 2.3277e-04
Loss = 2.6825e-03, PNorm = 169.8005, GNorm = 0.1638, lr_0 = 2.3261e-04
Loss = 2.8667e-03, PNorm = 169.8091, GNorm = 0.1076, lr_0 = 2.3246e-04
Loss = 3.4299e-03, PNorm = 169.8150, GNorm = 0.1262, lr_0 = 2.3230e-04
Loss = 5.9634e-03, PNorm = 169.8213, GNorm = 0.1880, lr_0 = 2.3214e-04
Loss = 2.9645e-03, PNorm = 169.8261, GNorm = 0.0910, lr_0 = 2.3198e-04
Loss = 3.8105e-03, PNorm = 169.8312, GNorm = 0.2112, lr_0 = 2.3182e-04
Loss = 2.8560e-03, PNorm = 169.8389, GNorm = 0.0570, lr_0 = 2.3166e-04
Loss = 2.6739e-03, PNorm = 169.8476, GNorm = 0.1824, lr_0 = 2.3150e-04
Loss = 2.7126e-03, PNorm = 169.8551, GNorm = 0.0769, lr_0 = 2.3134e-04
Loss = 3.0014e-03, PNorm = 169.8625, GNorm = 0.1457, lr_0 = 2.3118e-04
Loss = 4.1840e-03, PNorm = 169.8680, GNorm = 0.1831, lr_0 = 2.3103e-04
Loss = 3.0918e-03, PNorm = 169.8754, GNorm = 0.1072, lr_0 = 2.3087e-04
Loss = 3.3909e-03, PNorm = 169.8830, GNorm = 0.3407, lr_0 = 2.3071e-04
Loss = 2.3619e-03, PNorm = 169.8867, GNorm = 0.0669, lr_0 = 2.3055e-04
Loss = 3.6131e-03, PNorm = 169.8922, GNorm = 0.1991, lr_0 = 2.3039e-04
Loss = 5.4269e-03, PNorm = 169.9007, GNorm = 0.1448, lr_0 = 2.3024e-04
Loss = 3.5986e-03, PNorm = 169.9081, GNorm = 0.2084, lr_0 = 2.3008e-04
Loss = 2.9096e-03, PNorm = 169.9141, GNorm = 0.5544, lr_0 = 2.2992e-04
Loss = 2.7146e-03, PNorm = 169.9189, GNorm = 0.1144, lr_0 = 2.2976e-04
Loss = 4.5315e-03, PNorm = 169.9270, GNorm = 0.1008, lr_0 = 2.2961e-04
Loss = 3.6281e-03, PNorm = 169.9346, GNorm = 0.1010, lr_0 = 2.2945e-04
Loss = 3.6908e-03, PNorm = 169.9417, GNorm = 0.2593, lr_0 = 2.2929e-04
Loss = 2.2085e-03, PNorm = 169.9473, GNorm = 0.0834, lr_0 = 2.2913e-04
Loss = 3.1652e-03, PNorm = 169.9527, GNorm = 0.2468, lr_0 = 2.2898e-04
Loss = 2.8468e-03, PNorm = 169.9593, GNorm = 0.0717, lr_0 = 2.2882e-04
Loss = 3.5915e-03, PNorm = 169.9670, GNorm = 0.1433, lr_0 = 2.2866e-04
Loss = 2.6703e-03, PNorm = 169.9728, GNorm = 0.2484, lr_0 = 2.2851e-04
Loss = 2.6738e-03, PNorm = 169.9775, GNorm = 0.1589, lr_0 = 2.2835e-04
Loss = 4.1251e-03, PNorm = 169.9857, GNorm = 0.2952, lr_0 = 2.2819e-04
Loss = 2.6930e-03, PNorm = 169.9924, GNorm = 0.0664, lr_0 = 2.2804e-04
Loss = 2.8145e-03, PNorm = 169.9972, GNorm = 0.0684, lr_0 = 2.2788e-04
Loss = 3.4111e-03, PNorm = 169.9998, GNorm = 0.2498, lr_0 = 2.2773e-04
Loss = 2.3644e-03, PNorm = 170.0047, GNorm = 0.3043, lr_0 = 2.2757e-04
Validation mae = 0.277628
Epoch 20
Loss = 2.6598e-03, PNorm = 170.0093, GNorm = 0.0801, lr_0 = 2.2741e-04
Loss = 2.9288e-03, PNorm = 170.0137, GNorm = 0.0911, lr_0 = 2.2726e-04
Loss = 3.0105e-03, PNorm = 170.0174, GNorm = 0.2483, lr_0 = 2.2710e-04
Loss = 2.5560e-03, PNorm = 170.0215, GNorm = 0.1467, lr_0 = 2.2695e-04
Loss = 2.3180e-03, PNorm = 170.0239, GNorm = 0.1282, lr_0 = 2.2679e-04
Loss = 2.2778e-03, PNorm = 170.0281, GNorm = 0.1490, lr_0 = 2.2664e-04
Loss = 2.4804e-03, PNorm = 170.0316, GNorm = 0.1052, lr_0 = 2.2648e-04
Loss = 2.9410e-03, PNorm = 170.0347, GNorm = 0.1738, lr_0 = 2.2632e-04
Loss = 2.6953e-03, PNorm = 170.0384, GNorm = 0.1383, lr_0 = 2.2617e-04
Loss = 2.4568e-03, PNorm = 170.0425, GNorm = 0.0932, lr_0 = 2.2601e-04
Loss = 2.5773e-03, PNorm = 170.0483, GNorm = 0.1106, lr_0 = 2.2586e-04
Loss = 2.3210e-03, PNorm = 170.0521, GNorm = 0.0634, lr_0 = 2.2571e-04
Loss = 3.1576e-03, PNorm = 170.0551, GNorm = 0.2186, lr_0 = 2.2555e-04
Loss = 2.5668e-03, PNorm = 170.0577, GNorm = 0.1190, lr_0 = 2.2540e-04
Loss = 1.9535e-03, PNorm = 170.0605, GNorm = 0.2420, lr_0 = 2.2524e-04
Loss = 2.4941e-03, PNorm = 170.0635, GNorm = 0.1404, lr_0 = 2.2509e-04
Loss = 2.8699e-03, PNorm = 170.0717, GNorm = 0.1624, lr_0 = 2.2493e-04
Loss = 3.5831e-03, PNorm = 170.0776, GNorm = 0.2833, lr_0 = 2.2478e-04
Loss = 2.9265e-03, PNorm = 170.0836, GNorm = 0.3333, lr_0 = 2.2463e-04
Loss = 2.2158e-03, PNorm = 170.0889, GNorm = 0.0877, lr_0 = 2.2447e-04
Loss = 2.6588e-03, PNorm = 170.0934, GNorm = 0.2746, lr_0 = 2.2432e-04
Loss = 2.2846e-03, PNorm = 170.1000, GNorm = 0.2216, lr_0 = 2.2416e-04
Loss = 3.7335e-03, PNorm = 170.1045, GNorm = 0.2159, lr_0 = 2.2401e-04
Loss = 2.7111e-03, PNorm = 170.1095, GNorm = 0.2066, lr_0 = 2.2386e-04
Loss = 2.7830e-03, PNorm = 170.1147, GNorm = 0.1769, lr_0 = 2.2370e-04
Loss = 4.6224e-03, PNorm = 170.1179, GNorm = 0.3050, lr_0 = 2.2355e-04
Loss = 2.0847e-03, PNorm = 170.1228, GNorm = 0.1304, lr_0 = 2.2340e-04
Loss = 2.2947e-03, PNorm = 170.1262, GNorm = 0.1858, lr_0 = 2.2324e-04
Loss = 4.3411e-03, PNorm = 170.1296, GNorm = 0.1777, lr_0 = 2.2309e-04
Loss = 3.9452e-03, PNorm = 170.1347, GNorm = 0.1830, lr_0 = 2.2294e-04
Loss = 3.6667e-03, PNorm = 170.1404, GNorm = 0.1100, lr_0 = 2.2279e-04
Loss = 2.3209e-03, PNorm = 170.1446, GNorm = 0.0854, lr_0 = 2.2263e-04
Loss = 4.4655e-03, PNorm = 170.1513, GNorm = 0.1167, lr_0 = 2.2248e-04
Loss = 2.5008e-03, PNorm = 170.1574, GNorm = 0.1251, lr_0 = 2.2233e-04
Loss = 2.2262e-03, PNorm = 170.1602, GNorm = 0.1724, lr_0 = 2.2218e-04
Loss = 2.6253e-03, PNorm = 170.1648, GNorm = 0.3566, lr_0 = 2.2202e-04
Loss = 2.7741e-03, PNorm = 170.1710, GNorm = 0.1559, lr_0 = 2.2187e-04
Loss = 2.3458e-03, PNorm = 170.1753, GNorm = 0.0790, lr_0 = 2.2172e-04
Loss = 2.1670e-03, PNorm = 170.1817, GNorm = 0.0899, lr_0 = 2.2157e-04
Loss = 2.0603e-03, PNorm = 170.1863, GNorm = 0.0866, lr_0 = 2.2142e-04
Loss = 2.3818e-03, PNorm = 170.1917, GNorm = 0.2153, lr_0 = 2.2126e-04
Loss = 3.1995e-03, PNorm = 170.1968, GNorm = 0.0517, lr_0 = 2.2111e-04
Loss = 2.1131e-03, PNorm = 170.2015, GNorm = 0.3274, lr_0 = 2.2096e-04
Loss = 2.5778e-03, PNorm = 170.2071, GNorm = 0.1704, lr_0 = 2.2081e-04
Loss = 2.1772e-03, PNorm = 170.2126, GNorm = 0.2176, lr_0 = 2.2066e-04
Loss = 3.2902e-03, PNorm = 170.2186, GNorm = 0.1757, lr_0 = 2.2051e-04
Loss = 2.3101e-03, PNorm = 170.2244, GNorm = 0.0974, lr_0 = 2.2036e-04
Loss = 3.1156e-03, PNorm = 170.2283, GNorm = 0.0712, lr_0 = 2.2021e-04
Loss = 3.3331e-03, PNorm = 170.2318, GNorm = 0.0467, lr_0 = 2.2005e-04
Loss = 2.8456e-03, PNorm = 170.2359, GNorm = 0.1710, lr_0 = 2.1990e-04
Loss = 3.3122e-03, PNorm = 170.2388, GNorm = 0.0862, lr_0 = 2.1975e-04
Loss = 2.8206e-03, PNorm = 170.2424, GNorm = 0.1313, lr_0 = 2.1960e-04
Loss = 3.3535e-03, PNorm = 170.2465, GNorm = 0.2505, lr_0 = 2.1945e-04
Loss = 5.8023e-03, PNorm = 170.2543, GNorm = 0.1122, lr_0 = 2.1930e-04
Loss = 2.5359e-03, PNorm = 170.2626, GNorm = 0.1395, lr_0 = 2.1915e-04
Loss = 2.0467e-03, PNorm = 170.2697, GNorm = 0.1507, lr_0 = 2.1900e-04
Loss = 2.4749e-03, PNorm = 170.2765, GNorm = 0.2050, lr_0 = 2.1885e-04
Loss = 2.0725e-03, PNorm = 170.2824, GNorm = 0.2175, lr_0 = 2.1870e-04
Loss = 3.0442e-03, PNorm = 170.2874, GNorm = 0.2756, lr_0 = 2.1855e-04
Loss = 2.3967e-03, PNorm = 170.2934, GNorm = 0.1421, lr_0 = 2.1840e-04
Loss = 2.2785e-03, PNorm = 170.2995, GNorm = 0.2923, lr_0 = 2.1825e-04
Loss = 2.9612e-03, PNorm = 170.3010, GNorm = 0.1746, lr_0 = 2.1810e-04
Loss = 4.2513e-03, PNorm = 170.3071, GNorm = 0.1175, lr_0 = 2.1795e-04
Loss = 6.5064e-03, PNorm = 170.3119, GNorm = 0.1183, lr_0 = 2.1780e-04
Loss = 3.2938e-03, PNorm = 170.3181, GNorm = 0.1959, lr_0 = 2.1765e-04
Loss = 5.3290e-03, PNorm = 170.3241, GNorm = 0.0640, lr_0 = 2.1751e-04
Loss = 4.1846e-03, PNorm = 170.3263, GNorm = 0.2148, lr_0 = 2.1736e-04
Loss = 3.2178e-03, PNorm = 170.3314, GNorm = 0.0526, lr_0 = 2.1721e-04
Loss = 3.1450e-03, PNorm = 170.3381, GNorm = 0.2759, lr_0 = 2.1706e-04
Loss = 2.3129e-03, PNorm = 170.3417, GNorm = 0.3194, lr_0 = 2.1691e-04
Loss = 3.1470e-03, PNorm = 170.3459, GNorm = 0.2672, lr_0 = 2.1676e-04
Loss = 3.5389e-03, PNorm = 170.3517, GNorm = 0.1251, lr_0 = 2.1661e-04
Loss = 3.7283e-03, PNorm = 170.3565, GNorm = 0.1279, lr_0 = 2.1646e-04
Loss = 2.0853e-03, PNorm = 170.3603, GNorm = 0.3210, lr_0 = 2.1632e-04
Loss = 3.0423e-03, PNorm = 170.3625, GNorm = 0.0720, lr_0 = 2.1617e-04
Loss = 2.4572e-03, PNorm = 170.3672, GNorm = 0.0730, lr_0 = 2.1602e-04
Loss = 2.0964e-03, PNorm = 170.3739, GNorm = 0.0971, lr_0 = 2.1587e-04
Loss = 2.4257e-03, PNorm = 170.3787, GNorm = 0.4424, lr_0 = 2.1572e-04
Loss = 3.6989e-03, PNorm = 170.3857, GNorm = 0.2888, lr_0 = 2.1558e-04
Loss = 2.4217e-03, PNorm = 170.3899, GNorm = 0.1016, lr_0 = 2.1543e-04
Loss = 3.7908e-03, PNorm = 170.3974, GNorm = 0.1430, lr_0 = 2.1528e-04
Loss = 3.1723e-03, PNorm = 170.4037, GNorm = 0.5041, lr_0 = 2.1513e-04
Loss = 2.8458e-03, PNorm = 170.4089, GNorm = 0.2005, lr_0 = 2.1499e-04
Loss = 2.9966e-03, PNorm = 170.4144, GNorm = 0.1764, lr_0 = 2.1484e-04
Loss = 3.7835e-03, PNorm = 170.4176, GNorm = 0.0536, lr_0 = 2.1469e-04
Loss = 5.0571e-03, PNorm = 170.4196, GNorm = 0.1472, lr_0 = 2.1454e-04
Loss = 4.7824e-03, PNorm = 170.4252, GNorm = 0.0955, lr_0 = 2.1440e-04
Loss = 2.8333e-03, PNorm = 170.4339, GNorm = 0.1052, lr_0 = 2.1425e-04
Loss = 2.4855e-03, PNorm = 170.4395, GNorm = 0.1381, lr_0 = 2.1410e-04
Loss = 2.4210e-03, PNorm = 170.4443, GNorm = 0.0988, lr_0 = 2.1396e-04
Loss = 3.1798e-03, PNorm = 170.4485, GNorm = 0.1730, lr_0 = 2.1381e-04
Loss = 2.1372e-03, PNorm = 170.4560, GNorm = 0.1222, lr_0 = 2.1366e-04
Loss = 5.3684e-03, PNorm = 170.4627, GNorm = 0.8986, lr_0 = 2.1352e-04
Loss = 2.4391e-03, PNorm = 170.4684, GNorm = 0.3684, lr_0 = 2.1337e-04
Loss = 2.4202e-03, PNorm = 170.4747, GNorm = 0.2664, lr_0 = 2.1323e-04
Loss = 3.4777e-03, PNorm = 170.4812, GNorm = 0.0676, lr_0 = 2.1308e-04
Loss = 2.6119e-03, PNorm = 170.4878, GNorm = 0.0817, lr_0 = 2.1293e-04
Loss = 2.1290e-03, PNorm = 170.4934, GNorm = 0.1038, lr_0 = 2.1279e-04
Loss = 2.0968e-03, PNorm = 170.4976, GNorm = 0.0889, lr_0 = 2.1264e-04
Loss = 2.2690e-03, PNorm = 170.5037, GNorm = 0.1180, lr_0 = 2.1250e-04
Loss = 4.5796e-03, PNorm = 170.5078, GNorm = 0.0642, lr_0 = 2.1235e-04
Loss = 3.9219e-03, PNorm = 170.5135, GNorm = 0.1124, lr_0 = 2.1221e-04
Loss = 3.8731e-03, PNorm = 170.5180, GNorm = 0.2565, lr_0 = 2.1206e-04
Loss = 3.3366e-03, PNorm = 170.5243, GNorm = 0.2211, lr_0 = 2.1191e-04
Loss = 2.5754e-03, PNorm = 170.5286, GNorm = 0.2938, lr_0 = 2.1177e-04
Loss = 7.0630e-03, PNorm = 170.5339, GNorm = 0.3910, lr_0 = 2.1162e-04
Loss = 3.3052e-03, PNorm = 170.5405, GNorm = 0.1905, lr_0 = 2.1148e-04
Loss = 4.1473e-03, PNorm = 170.5465, GNorm = 0.1380, lr_0 = 2.1133e-04
Loss = 2.9288e-03, PNorm = 170.5515, GNorm = 0.1573, lr_0 = 2.1119e-04
Loss = 4.2139e-03, PNorm = 170.5547, GNorm = 0.1608, lr_0 = 2.1104e-04
Loss = 3.2570e-03, PNorm = 170.5616, GNorm = 0.1894, lr_0 = 2.1090e-04
Loss = 2.7429e-03, PNorm = 170.5681, GNorm = 0.1919, lr_0 = 2.1076e-04
Loss = 2.4608e-03, PNorm = 170.5744, GNorm = 0.1841, lr_0 = 2.1061e-04
Loss = 2.3160e-03, PNorm = 170.5790, GNorm = 0.1727, lr_0 = 2.1047e-04
Loss = 3.1059e-03, PNorm = 170.5844, GNorm = 0.2247, lr_0 = 2.1032e-04
Loss = 3.1388e-03, PNorm = 170.5900, GNorm = 0.2001, lr_0 = 2.1018e-04
Loss = 2.4170e-03, PNorm = 170.5960, GNorm = 0.1143, lr_0 = 2.1003e-04
Loss = 2.6457e-03, PNorm = 170.6018, GNorm = 0.0506, lr_0 = 2.0989e-04
Loss = 5.0761e-03, PNorm = 170.6069, GNorm = 0.2101, lr_0 = 2.0975e-04
Loss = 2.9528e-03, PNorm = 170.6133, GNorm = 0.1376, lr_0 = 2.0960e-04
Validation mae = 0.278198
Epoch 21
Loss = 3.8768e-03, PNorm = 170.6174, GNorm = 0.0982, lr_0 = 2.0946e-04
Loss = 3.1218e-03, PNorm = 170.6199, GNorm = 0.1829, lr_0 = 2.0932e-04
Loss = 1.7411e-03, PNorm = 170.6234, GNorm = 0.0594, lr_0 = 2.0917e-04
Loss = 3.1913e-03, PNorm = 170.6273, GNorm = 0.1400, lr_0 = 2.0903e-04
Loss = 2.7325e-03, PNorm = 170.6292, GNorm = 0.1051, lr_0 = 2.0889e-04
Loss = 2.3945e-03, PNorm = 170.6326, GNorm = 0.3211, lr_0 = 2.0874e-04
Loss = 1.7463e-03, PNorm = 170.6388, GNorm = 0.1279, lr_0 = 2.0860e-04
Loss = 2.2787e-03, PNorm = 170.6428, GNorm = 0.0703, lr_0 = 2.0846e-04
Loss = 2.4786e-03, PNorm = 170.6474, GNorm = 0.1605, lr_0 = 2.0831e-04
Loss = 3.0258e-03, PNorm = 170.6494, GNorm = 0.1327, lr_0 = 2.0817e-04
Loss = 2.2560e-03, PNorm = 170.6513, GNorm = 0.1697, lr_0 = 2.0803e-04
Loss = 2.0215e-03, PNorm = 170.6545, GNorm = 0.0641, lr_0 = 2.0789e-04
Loss = 2.2010e-03, PNorm = 170.6584, GNorm = 0.1559, lr_0 = 2.0774e-04
Loss = 2.5515e-03, PNorm = 170.6603, GNorm = 0.4429, lr_0 = 2.0760e-04
Loss = 2.6414e-03, PNorm = 170.6663, GNorm = 0.1050, lr_0 = 2.0746e-04
Loss = 3.8100e-03, PNorm = 170.6716, GNorm = 0.0772, lr_0 = 2.0732e-04
Loss = 4.4715e-03, PNorm = 170.6751, GNorm = 0.7034, lr_0 = 2.0718e-04
Loss = 2.3129e-03, PNorm = 170.6787, GNorm = 0.1431, lr_0 = 2.0703e-04
Loss = 3.8006e-03, PNorm = 170.6822, GNorm = 0.0902, lr_0 = 2.0689e-04
Loss = 3.6378e-03, PNorm = 170.6856, GNorm = 0.3551, lr_0 = 2.0675e-04
Loss = 2.6634e-03, PNorm = 170.6875, GNorm = 0.3093, lr_0 = 2.0661e-04
Loss = 3.0280e-03, PNorm = 170.6917, GNorm = 0.1235, lr_0 = 2.0647e-04
Loss = 2.4064e-03, PNorm = 170.6945, GNorm = 0.1635, lr_0 = 2.0633e-04
Loss = 2.0364e-03, PNorm = 170.6983, GNorm = 0.0534, lr_0 = 2.0618e-04
Loss = 2.1350e-03, PNorm = 170.7026, GNorm = 0.1501, lr_0 = 2.0604e-04
Loss = 2.1408e-03, PNorm = 170.7078, GNorm = 0.0645, lr_0 = 2.0590e-04
Loss = 1.9941e-03, PNorm = 170.7127, GNorm = 0.2194, lr_0 = 2.0576e-04
Loss = 2.4307e-03, PNorm = 170.7194, GNorm = 0.1678, lr_0 = 2.0562e-04
Loss = 3.6035e-03, PNorm = 170.7242, GNorm = 0.2416, lr_0 = 2.0548e-04
Loss = 3.0420e-03, PNorm = 170.7299, GNorm = 0.0786, lr_0 = 2.0534e-04
Loss = 2.2724e-03, PNorm = 170.7341, GNorm = 0.1635, lr_0 = 2.0520e-04
Loss = 1.8682e-03, PNorm = 170.7407, GNorm = 0.1968, lr_0 = 2.0506e-04
Loss = 2.6393e-03, PNorm = 170.7433, GNorm = 0.0644, lr_0 = 2.0492e-04
Loss = 2.9682e-03, PNorm = 170.7459, GNorm = 0.1210, lr_0 = 2.0478e-04
Loss = 3.1081e-03, PNorm = 170.7491, GNorm = 0.4124, lr_0 = 2.0464e-04
Loss = 1.9552e-03, PNorm = 170.7528, GNorm = 0.1035, lr_0 = 2.0450e-04
Loss = 2.4913e-03, PNorm = 170.7560, GNorm = 0.1481, lr_0 = 2.0436e-04
Loss = 2.6262e-03, PNorm = 170.7618, GNorm = 0.0987, lr_0 = 2.0422e-04
Loss = 2.8206e-03, PNorm = 170.7655, GNorm = 0.0616, lr_0 = 2.0408e-04
Loss = 2.8993e-03, PNorm = 170.7694, GNorm = 0.1932, lr_0 = 2.0394e-04
Loss = 2.1581e-03, PNorm = 170.7749, GNorm = 0.0502, lr_0 = 2.0380e-04
Loss = 1.7397e-03, PNorm = 170.7790, GNorm = 0.1745, lr_0 = 2.0366e-04
Loss = 2.1565e-03, PNorm = 170.7833, GNorm = 0.1244, lr_0 = 2.0352e-04
Loss = 3.3512e-03, PNorm = 170.7878, GNorm = 0.2242, lr_0 = 2.0338e-04
Loss = 2.5585e-03, PNorm = 170.7912, GNorm = 0.0480, lr_0 = 2.0324e-04
Loss = 3.7836e-03, PNorm = 170.7954, GNorm = 0.0779, lr_0 = 2.0310e-04
Loss = 2.4306e-03, PNorm = 170.7990, GNorm = 0.1992, lr_0 = 2.0296e-04
Loss = 2.3983e-03, PNorm = 170.8016, GNorm = 0.3740, lr_0 = 2.0282e-04
Loss = 2.4373e-03, PNorm = 170.8063, GNorm = 0.2382, lr_0 = 2.0268e-04
Loss = 3.4473e-03, PNorm = 170.8105, GNorm = 0.3087, lr_0 = 2.0254e-04
Loss = 3.1222e-03, PNorm = 170.8176, GNorm = 0.0945, lr_0 = 2.0240e-04
Loss = 1.8888e-03, PNorm = 170.8255, GNorm = 0.1237, lr_0 = 2.0227e-04
Loss = 2.0475e-03, PNorm = 170.8308, GNorm = 0.0753, lr_0 = 2.0213e-04
Loss = 3.6544e-03, PNorm = 170.8369, GNorm = 0.1821, lr_0 = 2.0199e-04
Loss = 2.7442e-03, PNorm = 170.8417, GNorm = 0.0989, lr_0 = 2.0185e-04
Loss = 2.0764e-03, PNorm = 170.8455, GNorm = 0.2641, lr_0 = 2.0171e-04
Loss = 2.4077e-03, PNorm = 170.8501, GNorm = 0.0913, lr_0 = 2.0157e-04
Loss = 4.9719e-03, PNorm = 170.8537, GNorm = 0.1314, lr_0 = 2.0144e-04
Loss = 2.0936e-03, PNorm = 170.8579, GNorm = 0.0370, lr_0 = 2.0130e-04
Loss = 2.6092e-03, PNorm = 170.8601, GNorm = 0.1417, lr_0 = 2.0116e-04
Loss = 1.9109e-03, PNorm = 170.8649, GNorm = 0.0833, lr_0 = 2.0102e-04
Loss = 2.1165e-03, PNorm = 170.8709, GNorm = 0.2583, lr_0 = 2.0088e-04
Loss = 2.6251e-03, PNorm = 170.8759, GNorm = 0.2076, lr_0 = 2.0075e-04
Loss = 2.7929e-03, PNorm = 170.8791, GNorm = 0.1678, lr_0 = 2.0061e-04
Loss = 2.3598e-03, PNorm = 170.8817, GNorm = 0.0876, lr_0 = 2.0047e-04
Loss = 3.5911e-03, PNorm = 170.8832, GNorm = 0.3625, lr_0 = 2.0033e-04
Loss = 2.9958e-03, PNorm = 170.8883, GNorm = 0.1841, lr_0 = 2.0020e-04
Loss = 2.1605e-03, PNorm = 170.8923, GNorm = 0.1736, lr_0 = 2.0006e-04
Loss = 1.8638e-03, PNorm = 170.8981, GNorm = 0.1675, lr_0 = 1.9992e-04
Loss = 1.7957e-03, PNorm = 170.9019, GNorm = 0.1592, lr_0 = 1.9979e-04
Loss = 3.8025e-03, PNorm = 170.9057, GNorm = 0.0444, lr_0 = 1.9965e-04
Loss = 2.2845e-03, PNorm = 170.9088, GNorm = 0.0804, lr_0 = 1.9951e-04
Loss = 3.9578e-03, PNorm = 170.9132, GNorm = 0.0905, lr_0 = 1.9938e-04
Loss = 1.8590e-03, PNorm = 170.9184, GNorm = 0.1608, lr_0 = 1.9924e-04
Loss = 4.1430e-03, PNorm = 170.9235, GNorm = 0.1318, lr_0 = 1.9910e-04
Loss = 2.8740e-03, PNorm = 170.9288, GNorm = 0.1058, lr_0 = 1.9897e-04
Loss = 1.6960e-03, PNorm = 170.9334, GNorm = 0.3216, lr_0 = 1.9883e-04
Loss = 1.8465e-03, PNorm = 170.9384, GNorm = 0.0495, lr_0 = 1.9869e-04
Loss = 2.2653e-03, PNorm = 170.9414, GNorm = 0.1245, lr_0 = 1.9856e-04
Loss = 2.3884e-03, PNorm = 170.9453, GNorm = 0.1107, lr_0 = 1.9842e-04
Loss = 2.0554e-03, PNorm = 170.9498, GNorm = 0.2370, lr_0 = 1.9829e-04
Loss = 2.0677e-03, PNorm = 170.9548, GNorm = 0.0530, lr_0 = 1.9815e-04
Loss = 2.4210e-03, PNorm = 170.9599, GNorm = 0.1321, lr_0 = 1.9801e-04
Loss = 3.7502e-03, PNorm = 170.9633, GNorm = 0.2561, lr_0 = 1.9788e-04
Loss = 4.5918e-03, PNorm = 170.9678, GNorm = 0.1634, lr_0 = 1.9774e-04
Loss = 4.4448e-03, PNorm = 170.9742, GNorm = 0.1475, lr_0 = 1.9761e-04
Loss = 1.7753e-03, PNorm = 170.9793, GNorm = 0.0512, lr_0 = 1.9747e-04
Loss = 2.1548e-03, PNorm = 170.9851, GNorm = 0.1339, lr_0 = 1.9734e-04
Loss = 2.2714e-03, PNorm = 170.9901, GNorm = 0.0582, lr_0 = 1.9720e-04
Loss = 1.8239e-03, PNorm = 170.9933, GNorm = 0.1335, lr_0 = 1.9707e-04
Loss = 3.5806e-03, PNorm = 170.9981, GNorm = 0.3049, lr_0 = 1.9693e-04
Loss = 1.7970e-03, PNorm = 171.0029, GNorm = 0.1604, lr_0 = 1.9680e-04
Loss = 4.5758e-03, PNorm = 171.0086, GNorm = 0.1684, lr_0 = 1.9666e-04
Loss = 3.1512e-03, PNorm = 171.0123, GNorm = 0.0591, lr_0 = 1.9653e-04
Loss = 1.7436e-03, PNorm = 171.0181, GNorm = 0.1765, lr_0 = 1.9639e-04
Loss = 2.1808e-03, PNorm = 171.0219, GNorm = 0.1658, lr_0 = 1.9626e-04
Loss = 2.4267e-03, PNorm = 171.0252, GNorm = 0.1552, lr_0 = 1.9612e-04
Loss = 2.3614e-03, PNorm = 171.0292, GNorm = 0.1686, lr_0 = 1.9599e-04
Loss = 2.6757e-03, PNorm = 171.0319, GNorm = 0.1793, lr_0 = 1.9585e-04
Loss = 4.4695e-03, PNorm = 171.0369, GNorm = 0.6053, lr_0 = 1.9572e-04
Loss = 3.1143e-03, PNorm = 171.0428, GNorm = 0.0822, lr_0 = 1.9559e-04
Loss = 2.3138e-03, PNorm = 171.0495, GNorm = 0.0891, lr_0 = 1.9545e-04
Loss = 2.9946e-03, PNorm = 171.0561, GNorm = 0.1571, lr_0 = 1.9532e-04
Loss = 2.3975e-03, PNorm = 171.0626, GNorm = 0.2440, lr_0 = 1.9518e-04
Loss = 2.0329e-03, PNorm = 171.0681, GNorm = 0.2655, lr_0 = 1.9505e-04
Loss = 5.7805e-03, PNorm = 171.0736, GNorm = 0.1760, lr_0 = 1.9492e-04
Loss = 3.7198e-03, PNorm = 171.0789, GNorm = 0.1318, lr_0 = 1.9478e-04
Loss = 5.6151e-03, PNorm = 171.0836, GNorm = 0.4253, lr_0 = 1.9465e-04
Loss = 2.5430e-03, PNorm = 171.0882, GNorm = 0.1991, lr_0 = 1.9452e-04
Loss = 1.7937e-03, PNorm = 171.0901, GNorm = 0.1150, lr_0 = 1.9438e-04
Loss = 2.3431e-03, PNorm = 171.0915, GNorm = 0.0823, lr_0 = 1.9425e-04
Loss = 2.0959e-03, PNorm = 171.0958, GNorm = 0.1834, lr_0 = 1.9412e-04
Loss = 2.7338e-03, PNorm = 171.1016, GNorm = 0.1426, lr_0 = 1.9398e-04
Loss = 2.1066e-03, PNorm = 171.1083, GNorm = 0.0505, lr_0 = 1.9385e-04
Loss = 2.6097e-03, PNorm = 171.1137, GNorm = 0.0548, lr_0 = 1.9372e-04
Loss = 2.1515e-03, PNorm = 171.1193, GNorm = 0.1303, lr_0 = 1.9359e-04
Loss = 2.5331e-03, PNorm = 171.1235, GNorm = 0.2428, lr_0 = 1.9345e-04
Loss = 2.1667e-03, PNorm = 171.1257, GNorm = 0.2134, lr_0 = 1.9332e-04
Loss = 2.1589e-03, PNorm = 171.1318, GNorm = 0.2019, lr_0 = 1.9319e-04
Loss = 2.8869e-03, PNorm = 171.1376, GNorm = 0.1005, lr_0 = 1.9306e-04
Validation mae = 0.277770
Epoch 22
Loss = 2.6834e-03, PNorm = 171.1405, GNorm = 0.1764, lr_0 = 1.9292e-04
Loss = 1.7078e-03, PNorm = 171.1428, GNorm = 0.1351, lr_0 = 1.9279e-04
Loss = 1.8008e-03, PNorm = 171.1456, GNorm = 0.1116, lr_0 = 1.9266e-04
Loss = 2.2207e-03, PNorm = 171.1495, GNorm = 0.1538, lr_0 = 1.9253e-04
Loss = 2.2041e-03, PNorm = 171.1511, GNorm = 0.2362, lr_0 = 1.9240e-04
Loss = 2.3747e-03, PNorm = 171.1531, GNorm = 0.0867, lr_0 = 1.9226e-04
Loss = 1.8054e-03, PNorm = 171.1554, GNorm = 0.0931, lr_0 = 1.9213e-04
Loss = 2.0104e-03, PNorm = 171.1598, GNorm = 0.1849, lr_0 = 1.9200e-04
Loss = 3.2262e-03, PNorm = 171.1635, GNorm = 0.2521, lr_0 = 1.9187e-04
Loss = 2.7172e-03, PNorm = 171.1674, GNorm = 0.0781, lr_0 = 1.9174e-04
Loss = 2.6360e-03, PNorm = 171.1717, GNorm = 0.2338, lr_0 = 1.9161e-04
Loss = 2.1459e-03, PNorm = 171.1743, GNorm = 0.0736, lr_0 = 1.9148e-04
Loss = 1.8476e-03, PNorm = 171.1789, GNorm = 0.0608, lr_0 = 1.9134e-04
Loss = 3.0248e-03, PNorm = 171.1827, GNorm = 0.1153, lr_0 = 1.9121e-04
Loss = 1.7198e-03, PNorm = 171.1879, GNorm = 0.1016, lr_0 = 1.9108e-04
Loss = 2.5665e-03, PNorm = 171.1905, GNorm = 0.0795, lr_0 = 1.9095e-04
Loss = 1.7733e-03, PNorm = 171.1932, GNorm = 0.2435, lr_0 = 1.9082e-04
Loss = 1.7005e-03, PNorm = 171.1976, GNorm = 0.0727, lr_0 = 1.9069e-04
Loss = 1.4988e-03, PNorm = 171.2006, GNorm = 0.1407, lr_0 = 1.9056e-04
Loss = 1.9337e-03, PNorm = 171.2030, GNorm = 0.1691, lr_0 = 1.9043e-04
Loss = 2.5288e-03, PNorm = 171.2050, GNorm = 0.1284, lr_0 = 1.9030e-04
Loss = 1.5903e-03, PNorm = 171.2082, GNorm = 0.1536, lr_0 = 1.9017e-04
Loss = 1.8804e-03, PNorm = 171.2128, GNorm = 0.1217, lr_0 = 1.9004e-04
Loss = 1.5034e-03, PNorm = 171.2162, GNorm = 0.1002, lr_0 = 1.8991e-04
Loss = 1.4951e-03, PNorm = 171.2188, GNorm = 0.0555, lr_0 = 1.8978e-04
Loss = 1.7276e-03, PNorm = 171.2214, GNorm = 0.2122, lr_0 = 1.8965e-04
Loss = 1.4719e-03, PNorm = 171.2247, GNorm = 0.2517, lr_0 = 1.8952e-04
Loss = 3.2428e-03, PNorm = 171.2274, GNorm = 0.1836, lr_0 = 1.8939e-04
Loss = 2.3593e-03, PNorm = 171.2294, GNorm = 0.1161, lr_0 = 1.8926e-04
Loss = 4.5087e-03, PNorm = 171.2318, GNorm = 0.2093, lr_0 = 1.8913e-04
Loss = 2.9065e-03, PNorm = 171.2343, GNorm = 0.1020, lr_0 = 1.8900e-04
Loss = 3.7388e-03, PNorm = 171.2363, GNorm = 0.1712, lr_0 = 1.8887e-04
Loss = 3.3237e-03, PNorm = 171.2411, GNorm = 0.1392, lr_0 = 1.8874e-04
Loss = 1.5211e-03, PNorm = 171.2447, GNorm = 0.0891, lr_0 = 1.8861e-04
Loss = 1.5473e-03, PNorm = 171.2482, GNorm = 0.1072, lr_0 = 1.8848e-04
Loss = 1.9054e-03, PNorm = 171.2518, GNorm = 0.0986, lr_0 = 1.8835e-04
Loss = 1.9005e-03, PNorm = 171.2549, GNorm = 0.0780, lr_0 = 1.8822e-04
Loss = 2.2072e-03, PNorm = 171.2568, GNorm = 0.0828, lr_0 = 1.8809e-04
Loss = 2.1582e-03, PNorm = 171.2617, GNorm = 0.2809, lr_0 = 1.8797e-04
Loss = 2.9149e-03, PNorm = 171.2641, GNorm = 0.2096, lr_0 = 1.8784e-04
Loss = 2.5472e-03, PNorm = 171.2673, GNorm = 0.1630, lr_0 = 1.8771e-04
Loss = 2.2033e-03, PNorm = 171.2703, GNorm = 0.2088, lr_0 = 1.8758e-04
Loss = 2.3192e-03, PNorm = 171.2726, GNorm = 0.0893, lr_0 = 1.8745e-04
Loss = 4.1321e-03, PNorm = 171.2767, GNorm = 0.2897, lr_0 = 1.8732e-04
Loss = 1.8018e-03, PNorm = 171.2800, GNorm = 0.1036, lr_0 = 1.8719e-04
Loss = 2.3217e-03, PNorm = 171.2839, GNorm = 0.1767, lr_0 = 1.8707e-04
Loss = 2.1961e-03, PNorm = 171.2887, GNorm = 0.1223, lr_0 = 1.8694e-04
Loss = 3.4342e-03, PNorm = 171.2924, GNorm = 0.0846, lr_0 = 1.8681e-04
Loss = 1.7584e-03, PNorm = 171.2970, GNorm = 0.0816, lr_0 = 1.8668e-04
Loss = 2.3215e-03, PNorm = 171.3008, GNorm = 0.1181, lr_0 = 1.8655e-04
Loss = 1.5051e-03, PNorm = 171.3055, GNorm = 0.1011, lr_0 = 1.8643e-04
Loss = 3.8990e-03, PNorm = 171.3101, GNorm = 0.1471, lr_0 = 1.8630e-04
Loss = 2.9612e-03, PNorm = 171.3124, GNorm = 0.0452, lr_0 = 1.8617e-04
Loss = 2.0718e-03, PNorm = 171.3160, GNorm = 0.2420, lr_0 = 1.8604e-04
Loss = 1.9460e-03, PNorm = 171.3202, GNorm = 0.0877, lr_0 = 1.8592e-04
Loss = 2.0242e-03, PNorm = 171.3238, GNorm = 0.0762, lr_0 = 1.8579e-04
Loss = 1.9088e-03, PNorm = 171.3290, GNorm = 0.2764, lr_0 = 1.8566e-04
Loss = 2.4689e-03, PNorm = 171.3335, GNorm = 0.0612, lr_0 = 1.8553e-04
Loss = 1.7285e-03, PNorm = 171.3362, GNorm = 0.0707, lr_0 = 1.8541e-04
Loss = 2.4236e-03, PNorm = 171.3392, GNorm = 0.1706, lr_0 = 1.8528e-04
Loss = 1.7563e-03, PNorm = 171.3434, GNorm = 0.1068, lr_0 = 1.8515e-04
Loss = 2.3891e-03, PNorm = 171.3466, GNorm = 0.0886, lr_0 = 1.8503e-04
Loss = 1.5499e-03, PNorm = 171.3503, GNorm = 0.0995, lr_0 = 1.8490e-04
Loss = 2.7414e-03, PNorm = 171.3534, GNorm = 0.1105, lr_0 = 1.8477e-04
Loss = 2.1226e-03, PNorm = 171.3592, GNorm = 0.2135, lr_0 = 1.8465e-04
Loss = 2.3247e-03, PNorm = 171.3623, GNorm = 0.1022, lr_0 = 1.8452e-04
Loss = 2.1536e-03, PNorm = 171.3654, GNorm = 0.0726, lr_0 = 1.8439e-04
Loss = 1.8605e-03, PNorm = 171.3668, GNorm = 0.3590, lr_0 = 1.8427e-04
Loss = 2.9651e-03, PNorm = 171.3683, GNorm = 0.1635, lr_0 = 1.8414e-04
Loss = 3.6305e-03, PNorm = 171.3708, GNorm = 0.1050, lr_0 = 1.8401e-04
Loss = 1.9009e-03, PNorm = 171.3754, GNorm = 0.1416, lr_0 = 1.8389e-04
Loss = 1.8258e-03, PNorm = 171.3789, GNorm = 0.1576, lr_0 = 1.8376e-04
Loss = 2.8546e-03, PNorm = 171.3825, GNorm = 0.1113, lr_0 = 1.8364e-04
Loss = 2.1237e-03, PNorm = 171.3873, GNorm = 0.4469, lr_0 = 1.8351e-04
Loss = 1.8183e-03, PNorm = 171.3929, GNorm = 0.1586, lr_0 = 1.8338e-04
Loss = 1.6956e-03, PNorm = 171.3968, GNorm = 0.0588, lr_0 = 1.8326e-04
Loss = 1.6374e-03, PNorm = 171.4017, GNorm = 0.1230, lr_0 = 1.8313e-04
Loss = 4.1910e-03, PNorm = 171.4072, GNorm = 0.2955, lr_0 = 1.8301e-04
Loss = 2.1232e-03, PNorm = 171.4103, GNorm = 0.0507, lr_0 = 1.8288e-04
Loss = 4.2527e-03, PNorm = 171.4141, GNorm = 0.2270, lr_0 = 1.8276e-04
Loss = 1.8561e-03, PNorm = 171.4185, GNorm = 0.0365, lr_0 = 1.8263e-04
Loss = 2.3187e-03, PNorm = 171.4233, GNorm = 0.3372, lr_0 = 1.8251e-04
Loss = 2.6897e-03, PNorm = 171.4273, GNorm = 0.1394, lr_0 = 1.8238e-04
Loss = 2.1938e-03, PNorm = 171.4321, GNorm = 0.1385, lr_0 = 1.8226e-04
Loss = 1.5700e-03, PNorm = 171.4360, GNorm = 0.0560, lr_0 = 1.8213e-04
Loss = 1.4227e-03, PNorm = 171.4390, GNorm = 0.1341, lr_0 = 1.8201e-04
Loss = 1.4596e-03, PNorm = 171.4424, GNorm = 0.1250, lr_0 = 1.8188e-04
Loss = 1.7187e-03, PNorm = 171.4464, GNorm = 0.1038, lr_0 = 1.8176e-04
Loss = 4.9247e-03, PNorm = 171.4491, GNorm = 0.2201, lr_0 = 1.8163e-04
Loss = 2.5611e-03, PNorm = 171.4520, GNorm = 0.0678, lr_0 = 1.8151e-04
Loss = 2.2531e-03, PNorm = 171.4549, GNorm = 0.1629, lr_0 = 1.8138e-04
Loss = 2.6331e-03, PNorm = 171.4580, GNorm = 0.0585, lr_0 = 1.8126e-04
Loss = 2.6388e-03, PNorm = 171.4614, GNorm = 0.1845, lr_0 = 1.8114e-04
Loss = 1.6267e-03, PNorm = 171.4641, GNorm = 0.2283, lr_0 = 1.8101e-04
Loss = 3.8157e-03, PNorm = 171.4679, GNorm = 0.2165, lr_0 = 1.8089e-04
Loss = 1.5330e-03, PNorm = 171.4709, GNorm = 0.0485, lr_0 = 1.8076e-04
Loss = 2.9265e-03, PNorm = 171.4739, GNorm = 0.1564, lr_0 = 1.8064e-04
Loss = 2.1826e-03, PNorm = 171.4775, GNorm = 0.3727, lr_0 = 1.8052e-04
Loss = 2.1277e-03, PNorm = 171.4820, GNorm = 0.1252, lr_0 = 1.8039e-04
Loss = 2.0851e-03, PNorm = 171.4877, GNorm = 0.1936, lr_0 = 1.8027e-04
Loss = 2.7793e-03, PNorm = 171.4921, GNorm = 0.1221, lr_0 = 1.8015e-04
Loss = 2.3559e-03, PNorm = 171.4970, GNorm = 0.0674, lr_0 = 1.8002e-04
Loss = 2.0772e-03, PNorm = 171.5024, GNorm = 0.0573, lr_0 = 1.7990e-04
Loss = 3.9799e-03, PNorm = 171.5064, GNorm = 0.2916, lr_0 = 1.7978e-04
Loss = 1.5717e-03, PNorm = 171.5089, GNorm = 0.0944, lr_0 = 1.7965e-04
Loss = 3.5629e-03, PNorm = 171.5141, GNorm = 0.3816, lr_0 = 1.7953e-04
Loss = 3.3402e-03, PNorm = 171.5173, GNorm = 0.1764, lr_0 = 1.7941e-04
Loss = 2.1845e-03, PNorm = 171.5203, GNorm = 0.0492, lr_0 = 1.7928e-04
Loss = 1.8531e-03, PNorm = 171.5242, GNorm = 0.0864, lr_0 = 1.7916e-04
Loss = 1.7931e-03, PNorm = 171.5301, GNorm = 0.1055, lr_0 = 1.7904e-04
Loss = 2.0371e-03, PNorm = 171.5343, GNorm = 0.2829, lr_0 = 1.7892e-04
Loss = 1.9526e-03, PNorm = 171.5387, GNorm = 0.1905, lr_0 = 1.7879e-04
Loss = 2.0191e-03, PNorm = 171.5434, GNorm = 0.1948, lr_0 = 1.7867e-04
Loss = 2.1309e-03, PNorm = 171.5470, GNorm = 0.2283, lr_0 = 1.7855e-04
Loss = 1.7277e-03, PNorm = 171.5519, GNorm = 0.1397, lr_0 = 1.7843e-04
Loss = 1.5899e-03, PNorm = 171.5562, GNorm = 0.1419, lr_0 = 1.7830e-04
Loss = 4.5390e-03, PNorm = 171.5582, GNorm = 0.0427, lr_0 = 1.7818e-04
Loss = 4.2640e-03, PNorm = 171.5612, GNorm = 0.2014, lr_0 = 1.7806e-04
Loss = 3.3472e-03, PNorm = 171.5636, GNorm = 0.3140, lr_0 = 1.7794e-04
Loss = 4.8977e-03, PNorm = 171.5682, GNorm = 0.2028, lr_0 = 1.7782e-04
Validation mae = 0.277833
Epoch 23
Loss = 1.5873e-03, PNorm = 171.5701, GNorm = 0.1039, lr_0 = 1.7769e-04
Loss = 1.8595e-03, PNorm = 171.5725, GNorm = 0.1077, lr_0 = 1.7757e-04
Loss = 1.3670e-03, PNorm = 171.5751, GNorm = 0.1097, lr_0 = 1.7745e-04
Loss = 2.1179e-03, PNorm = 171.5802, GNorm = 0.1866, lr_0 = 1.7733e-04
Loss = 1.7982e-03, PNorm = 171.5849, GNorm = 0.0415, lr_0 = 1.7721e-04
Loss = 1.9255e-03, PNorm = 171.5869, GNorm = 0.1625, lr_0 = 1.7709e-04
Loss = 2.3045e-03, PNorm = 171.5897, GNorm = 0.2057, lr_0 = 1.7696e-04
Loss = 1.2407e-03, PNorm = 171.5911, GNorm = 0.1225, lr_0 = 1.7684e-04
Loss = 1.4280e-03, PNorm = 171.5941, GNorm = 0.0458, lr_0 = 1.7672e-04
Loss = 2.1887e-03, PNorm = 171.5969, GNorm = 0.1094, lr_0 = 1.7660e-04
Loss = 2.0520e-03, PNorm = 171.6003, GNorm = 0.1051, lr_0 = 1.7648e-04
Loss = 1.9302e-03, PNorm = 171.6047, GNorm = 0.1807, lr_0 = 1.7636e-04
Loss = 1.8909e-03, PNorm = 171.6064, GNorm = 0.1970, lr_0 = 1.7624e-04
Loss = 3.1145e-03, PNorm = 171.6067, GNorm = 0.1382, lr_0 = 1.7612e-04
Loss = 1.3698e-03, PNorm = 171.6081, GNorm = 0.1056, lr_0 = 1.7600e-04
Loss = 2.5091e-03, PNorm = 171.6114, GNorm = 0.0524, lr_0 = 1.7588e-04
Loss = 2.5524e-03, PNorm = 171.6136, GNorm = 0.2575, lr_0 = 1.7576e-04
Loss = 2.9555e-03, PNorm = 171.6154, GNorm = 0.0985, lr_0 = 1.7564e-04
Loss = 1.4158e-03, PNorm = 171.6166, GNorm = 0.1414, lr_0 = 1.7552e-04
Loss = 1.3647e-03, PNorm = 171.6173, GNorm = 0.2141, lr_0 = 1.7540e-04
Loss = 1.9023e-03, PNorm = 171.6202, GNorm = 0.0909, lr_0 = 1.7528e-04
Loss = 1.9322e-03, PNorm = 171.6234, GNorm = 0.2576, lr_0 = 1.7516e-04
Loss = 2.3609e-03, PNorm = 171.6259, GNorm = 0.1458, lr_0 = 1.7504e-04
Loss = 1.7178e-03, PNorm = 171.6283, GNorm = 0.1072, lr_0 = 1.7492e-04
Loss = 1.3095e-03, PNorm = 171.6311, GNorm = 0.1732, lr_0 = 1.7480e-04
Loss = 1.8748e-03, PNorm = 171.6347, GNorm = 0.0642, lr_0 = 1.7468e-04
Loss = 2.1686e-03, PNorm = 171.6396, GNorm = 0.1170, lr_0 = 1.7456e-04
Loss = 1.4418e-03, PNorm = 171.6431, GNorm = 0.2716, lr_0 = 1.7444e-04
Loss = 1.9719e-03, PNorm = 171.6454, GNorm = 0.1667, lr_0 = 1.7432e-04
Loss = 2.5680e-03, PNorm = 171.6491, GNorm = 0.1160, lr_0 = 1.7420e-04
Loss = 1.5136e-03, PNorm = 171.6532, GNorm = 0.1937, lr_0 = 1.7408e-04
Loss = 3.6687e-03, PNorm = 171.6547, GNorm = 0.0859, lr_0 = 1.7396e-04
Loss = 1.9890e-03, PNorm = 171.6583, GNorm = 0.1409, lr_0 = 1.7384e-04
Loss = 4.7614e-03, PNorm = 171.6611, GNorm = 0.0814, lr_0 = 1.7372e-04
Loss = 2.3984e-03, PNorm = 171.6649, GNorm = 0.1022, lr_0 = 1.7360e-04
Loss = 3.0314e-03, PNorm = 171.6675, GNorm = 0.1847, lr_0 = 1.7348e-04
Loss = 1.6060e-03, PNorm = 171.6700, GNorm = 0.0465, lr_0 = 1.7336e-04
Loss = 1.4565e-03, PNorm = 171.6715, GNorm = 0.1522, lr_0 = 1.7325e-04
Loss = 1.5549e-03, PNorm = 171.6736, GNorm = 0.1252, lr_0 = 1.7313e-04
Loss = 1.3761e-03, PNorm = 171.6747, GNorm = 0.0925, lr_0 = 1.7301e-04
Loss = 1.3013e-03, PNorm = 171.6776, GNorm = 0.0596, lr_0 = 1.7289e-04
Loss = 1.7918e-03, PNorm = 171.6787, GNorm = 0.2004, lr_0 = 1.7277e-04
Loss = 1.5010e-03, PNorm = 171.6818, GNorm = 0.1842, lr_0 = 1.7265e-04
Loss = 1.7267e-03, PNorm = 171.6845, GNorm = 0.0429, lr_0 = 1.7253e-04
Loss = 1.0318e-03, PNorm = 171.6878, GNorm = 0.2269, lr_0 = 1.7242e-04
Loss = 1.6098e-03, PNorm = 171.6921, GNorm = 0.0945, lr_0 = 1.7230e-04
Loss = 2.2405e-03, PNorm = 171.6947, GNorm = 0.0722, lr_0 = 1.7218e-04
Loss = 1.3401e-03, PNorm = 171.6970, GNorm = 0.0883, lr_0 = 1.7206e-04
Loss = 1.3767e-03, PNorm = 171.7012, GNorm = 0.1987, lr_0 = 1.7194e-04
Loss = 1.6333e-03, PNorm = 171.7038, GNorm = 0.0700, lr_0 = 1.7183e-04
Loss = 1.5383e-03, PNorm = 171.7064, GNorm = 0.0901, lr_0 = 1.7171e-04
Loss = 1.4630e-03, PNorm = 171.7102, GNorm = 0.2334, lr_0 = 1.7159e-04
Loss = 1.8543e-03, PNorm = 171.7127, GNorm = 0.1570, lr_0 = 1.7147e-04
Loss = 1.9566e-03, PNorm = 171.7148, GNorm = 0.1092, lr_0 = 1.7136e-04
Loss = 2.0923e-03, PNorm = 171.7176, GNorm = 0.2503, lr_0 = 1.7124e-04
Loss = 2.3962e-03, PNorm = 171.7208, GNorm = 0.1281, lr_0 = 1.7112e-04
Loss = 3.8146e-03, PNorm = 171.7255, GNorm = 0.0787, lr_0 = 1.7100e-04
Loss = 2.8559e-03, PNorm = 171.7280, GNorm = 0.1389, lr_0 = 1.7089e-04
Loss = 2.7041e-03, PNorm = 171.7302, GNorm = 0.0779, lr_0 = 1.7077e-04
Loss = 1.4576e-03, PNorm = 171.7353, GNorm = 0.1450, lr_0 = 1.7065e-04
Loss = 3.0678e-03, PNorm = 171.7390, GNorm = 0.7771, lr_0 = 1.7054e-04
Loss = 3.9082e-03, PNorm = 171.7410, GNorm = 0.4879, lr_0 = 1.7042e-04
Loss = 1.6021e-03, PNorm = 171.7442, GNorm = 0.0697, lr_0 = 1.7030e-04
Loss = 2.5647e-03, PNorm = 171.7459, GNorm = 0.1737, lr_0 = 1.7019e-04
Loss = 1.8488e-03, PNorm = 171.7482, GNorm = 0.2442, lr_0 = 1.7007e-04
Loss = 2.3951e-03, PNorm = 171.7502, GNorm = 0.0962, lr_0 = 1.6995e-04
Loss = 2.3243e-03, PNorm = 171.7542, GNorm = 0.1028, lr_0 = 1.6984e-04
Loss = 1.6144e-03, PNorm = 171.7577, GNorm = 0.1361, lr_0 = 1.6972e-04
Loss = 2.9755e-03, PNorm = 171.7638, GNorm = 0.0712, lr_0 = 1.6960e-04
Loss = 1.6638e-03, PNorm = 171.7695, GNorm = 0.0580, lr_0 = 1.6949e-04
Loss = 1.2604e-03, PNorm = 171.7751, GNorm = 0.0848, lr_0 = 1.6937e-04
Loss = 3.7297e-03, PNorm = 171.7798, GNorm = 0.4046, lr_0 = 1.6926e-04
Loss = 1.4628e-03, PNorm = 171.7843, GNorm = 0.0349, lr_0 = 1.6914e-04
Loss = 1.9575e-03, PNorm = 171.7882, GNorm = 0.2610, lr_0 = 1.6902e-04
Loss = 2.8639e-03, PNorm = 171.7899, GNorm = 0.1913, lr_0 = 1.6891e-04
Loss = 1.2035e-03, PNorm = 171.7928, GNorm = 0.0870, lr_0 = 1.6879e-04
Loss = 1.8127e-03, PNorm = 171.7965, GNorm = 0.0553, lr_0 = 1.6868e-04
Loss = 1.4881e-03, PNorm = 171.7992, GNorm = 0.1464, lr_0 = 1.6856e-04
Loss = 1.2468e-03, PNorm = 171.8017, GNorm = 0.1196, lr_0 = 1.6845e-04
Loss = 2.0824e-03, PNorm = 171.8034, GNorm = 0.1190, lr_0 = 1.6833e-04
Loss = 2.0448e-03, PNorm = 171.8058, GNorm = 0.1286, lr_0 = 1.6821e-04
Loss = 1.1397e-03, PNorm = 171.8072, GNorm = 0.0252, lr_0 = 1.6810e-04
Loss = 3.5669e-03, PNorm = 171.8103, GNorm = 0.2546, lr_0 = 1.6798e-04
Loss = 2.4355e-03, PNorm = 171.8153, GNorm = 0.0340, lr_0 = 1.6787e-04
Loss = 3.9465e-03, PNorm = 171.8182, GNorm = 0.3245, lr_0 = 1.6775e-04
Loss = 1.2903e-03, PNorm = 171.8209, GNorm = 0.0858, lr_0 = 1.6764e-04
Loss = 2.8170e-03, PNorm = 171.8225, GNorm = 0.2106, lr_0 = 1.6752e-04
Loss = 1.3937e-03, PNorm = 171.8260, GNorm = 0.1256, lr_0 = 1.6741e-04
Loss = 1.6833e-03, PNorm = 171.8301, GNorm = 0.1418, lr_0 = 1.6729e-04
Loss = 1.4835e-03, PNorm = 171.8330, GNorm = 0.1906, lr_0 = 1.6718e-04
Loss = 1.3627e-03, PNorm = 171.8366, GNorm = 0.0538, lr_0 = 1.6707e-04
Loss = 2.0691e-03, PNorm = 171.8399, GNorm = 0.0644, lr_0 = 1.6695e-04
Loss = 2.1430e-03, PNorm = 171.8417, GNorm = 0.1803, lr_0 = 1.6684e-04
Loss = 1.7068e-03, PNorm = 171.8449, GNorm = 0.1289, lr_0 = 1.6672e-04
Loss = 1.1425e-03, PNorm = 171.8483, GNorm = 0.1583, lr_0 = 1.6661e-04
Loss = 2.4414e-03, PNorm = 171.8498, GNorm = 0.1052, lr_0 = 1.6649e-04
Loss = 3.9025e-03, PNorm = 171.8537, GNorm = 0.6523, lr_0 = 1.6638e-04
Loss = 1.3894e-03, PNorm = 171.8559, GNorm = 0.1713, lr_0 = 1.6627e-04
Loss = 1.4002e-03, PNorm = 171.8608, GNorm = 0.0838, lr_0 = 1.6615e-04
Loss = 1.5750e-03, PNorm = 171.8644, GNorm = 0.1880, lr_0 = 1.6604e-04
Loss = 1.7618e-03, PNorm = 171.8677, GNorm = 0.0770, lr_0 = 1.6592e-04
Loss = 4.8623e-03, PNorm = 171.8698, GNorm = 0.1142, lr_0 = 1.6581e-04
Loss = 1.5065e-03, PNorm = 171.8714, GNorm = 0.1005, lr_0 = 1.6570e-04
Loss = 2.9867e-03, PNorm = 171.8741, GNorm = 0.1218, lr_0 = 1.6558e-04
Loss = 2.1202e-03, PNorm = 171.8758, GNorm = 0.0541, lr_0 = 1.6547e-04
Loss = 2.8687e-03, PNorm = 171.8792, GNorm = 0.1286, lr_0 = 1.6536e-04
Loss = 1.4291e-03, PNorm = 171.8834, GNorm = 0.1050, lr_0 = 1.6524e-04
Loss = 1.5489e-03, PNorm = 171.8874, GNorm = 0.1017, lr_0 = 1.6513e-04
Loss = 2.3753e-03, PNorm = 171.8912, GNorm = 0.2717, lr_0 = 1.6502e-04
Loss = 2.5432e-03, PNorm = 171.8951, GNorm = 0.0670, lr_0 = 1.6490e-04
Loss = 1.2301e-03, PNorm = 171.8987, GNorm = 0.1130, lr_0 = 1.6479e-04
Loss = 2.4341e-03, PNorm = 171.9012, GNorm = 0.0686, lr_0 = 1.6468e-04
Loss = 1.3962e-03, PNorm = 171.9039, GNorm = 0.1030, lr_0 = 1.6457e-04
Loss = 1.7616e-03, PNorm = 171.9078, GNorm = 0.1536, lr_0 = 1.6445e-04
Loss = 2.5295e-03, PNorm = 171.9095, GNorm = 0.2034, lr_0 = 1.6434e-04
Loss = 4.7809e-03, PNorm = 171.9137, GNorm = 0.1800, lr_0 = 1.6423e-04
Loss = 1.8241e-03, PNorm = 171.9176, GNorm = 0.1367, lr_0 = 1.6412e-04
Loss = 2.9130e-03, PNorm = 171.9218, GNorm = 0.1985, lr_0 = 1.6400e-04
Loss = 2.6974e-03, PNorm = 171.9246, GNorm = 0.0445, lr_0 = 1.6389e-04
Loss = 1.4301e-03, PNorm = 171.9279, GNorm = 0.0361, lr_0 = 1.6378e-04
Validation mae = 0.277852
Epoch 24
Loss = 1.5701e-03, PNorm = 171.9302, GNorm = 0.0899, lr_0 = 1.6367e-04
Loss = 2.2078e-03, PNorm = 171.9309, GNorm = 0.1840, lr_0 = 1.6355e-04
Loss = 1.2629e-03, PNorm = 171.9330, GNorm = 0.1504, lr_0 = 1.6344e-04
Loss = 2.0009e-03, PNorm = 171.9347, GNorm = 0.1166, lr_0 = 1.6333e-04
Loss = 2.3035e-03, PNorm = 171.9357, GNorm = 0.2760, lr_0 = 1.6322e-04
Loss = 1.5065e-03, PNorm = 171.9358, GNorm = 0.1427, lr_0 = 1.6311e-04
Loss = 3.4807e-03, PNorm = 171.9365, GNorm = 0.0830, lr_0 = 1.6299e-04
Loss = 1.9022e-03, PNorm = 171.9394, GNorm = 0.1013, lr_0 = 1.6288e-04
Loss = 1.2101e-03, PNorm = 171.9415, GNorm = 0.0759, lr_0 = 1.6277e-04
Loss = 1.4197e-03, PNorm = 171.9449, GNorm = 0.1190, lr_0 = 1.6266e-04
Loss = 1.3918e-03, PNorm = 171.9463, GNorm = 0.1223, lr_0 = 1.6255e-04
Loss = 9.8158e-04, PNorm = 171.9485, GNorm = 0.0963, lr_0 = 1.6244e-04
Loss = 1.3577e-03, PNorm = 171.9519, GNorm = 0.0376, lr_0 = 1.6233e-04
Loss = 1.2654e-03, PNorm = 171.9543, GNorm = 0.0626, lr_0 = 1.6221e-04
Loss = 1.1835e-03, PNorm = 171.9576, GNorm = 0.0738, lr_0 = 1.6210e-04
Loss = 1.6684e-03, PNorm = 171.9595, GNorm = 0.3033, lr_0 = 1.6199e-04
Loss = 2.0354e-03, PNorm = 171.9611, GNorm = 0.0531, lr_0 = 1.6188e-04
Loss = 1.4259e-03, PNorm = 171.9643, GNorm = 0.1116, lr_0 = 1.6177e-04
Loss = 1.0411e-03, PNorm = 171.9667, GNorm = 0.2398, lr_0 = 1.6166e-04
Loss = 2.9605e-03, PNorm = 171.9690, GNorm = 0.3922, lr_0 = 1.6155e-04
Loss = 2.1747e-03, PNorm = 171.9695, GNorm = 0.1299, lr_0 = 1.6144e-04
Loss = 1.5423e-03, PNorm = 171.9713, GNorm = 0.1343, lr_0 = 1.6133e-04
Loss = 1.4263e-03, PNorm = 171.9741, GNorm = 0.0951, lr_0 = 1.6122e-04
Loss = 1.0478e-03, PNorm = 171.9762, GNorm = 0.1585, lr_0 = 1.6111e-04
Loss = 1.5211e-03, PNorm = 171.9770, GNorm = 0.0296, lr_0 = 1.6100e-04
Loss = 2.4406e-03, PNorm = 171.9786, GNorm = 0.0879, lr_0 = 1.6089e-04
Loss = 2.8709e-03, PNorm = 171.9805, GNorm = 0.2880, lr_0 = 1.6078e-04
Loss = 3.0192e-03, PNorm = 171.9840, GNorm = 0.1008, lr_0 = 1.6067e-04
Loss = 1.3353e-03, PNorm = 171.9869, GNorm = 0.0850, lr_0 = 1.6056e-04
Loss = 2.4186e-03, PNorm = 171.9892, GNorm = 0.4607, lr_0 = 1.6045e-04
Loss = 1.2189e-03, PNorm = 171.9924, GNorm = 0.1060, lr_0 = 1.6034e-04
Loss = 2.5818e-03, PNorm = 171.9954, GNorm = 0.1103, lr_0 = 1.6023e-04
Loss = 1.6399e-03, PNorm = 171.9964, GNorm = 0.1948, lr_0 = 1.6012e-04
Loss = 1.8702e-03, PNorm = 172.0006, GNorm = 0.1376, lr_0 = 1.6001e-04
Loss = 2.6637e-03, PNorm = 172.0021, GNorm = 0.1869, lr_0 = 1.5990e-04
Loss = 2.4338e-03, PNorm = 172.0063, GNorm = 0.0717, lr_0 = 1.5979e-04
Loss = 1.4550e-03, PNorm = 172.0107, GNorm = 0.1828, lr_0 = 1.5968e-04
Loss = 1.7054e-03, PNorm = 172.0151, GNorm = 0.2552, lr_0 = 1.5957e-04
Loss = 2.9928e-03, PNorm = 172.0187, GNorm = 0.2348, lr_0 = 1.5946e-04
Loss = 1.6715e-03, PNorm = 172.0205, GNorm = 0.1995, lr_0 = 1.5935e-04
Loss = 2.3014e-03, PNorm = 172.0226, GNorm = 0.2148, lr_0 = 1.5924e-04
Loss = 1.4996e-03, PNorm = 172.0259, GNorm = 0.0611, lr_0 = 1.5913e-04
Loss = 2.3014e-03, PNorm = 172.0294, GNorm = 0.0882, lr_0 = 1.5902e-04
Loss = 3.1260e-03, PNorm = 172.0310, GNorm = 0.1114, lr_0 = 1.5891e-04
Loss = 2.6344e-03, PNorm = 172.0354, GNorm = 0.1456, lr_0 = 1.5880e-04
Loss = 1.3986e-03, PNorm = 172.0395, GNorm = 0.1015, lr_0 = 1.5870e-04
Loss = 1.5900e-03, PNorm = 172.0443, GNorm = 0.2205, lr_0 = 1.5859e-04
Loss = 1.1913e-03, PNorm = 172.0468, GNorm = 0.1454, lr_0 = 1.5848e-04
Loss = 2.0143e-03, PNorm = 172.0496, GNorm = 0.0582, lr_0 = 1.5837e-04
Loss = 1.7127e-03, PNorm = 172.0498, GNorm = 0.1069, lr_0 = 1.5826e-04
Loss = 2.1705e-03, PNorm = 172.0515, GNorm = 0.2665, lr_0 = 1.5815e-04
Loss = 1.5780e-03, PNorm = 172.0549, GNorm = 0.3140, lr_0 = 1.5804e-04
Loss = 1.2679e-03, PNorm = 172.0581, GNorm = 0.0819, lr_0 = 1.5794e-04
Loss = 1.0374e-03, PNorm = 172.0596, GNorm = 0.1978, lr_0 = 1.5783e-04
Loss = 2.6178e-03, PNorm = 172.0613, GNorm = 0.0613, lr_0 = 1.5772e-04
Loss = 1.1742e-03, PNorm = 172.0644, GNorm = 0.2097, lr_0 = 1.5761e-04
Loss = 1.5088e-03, PNorm = 172.0677, GNorm = 0.1948, lr_0 = 1.5750e-04
Loss = 2.3928e-03, PNorm = 172.0701, GNorm = 0.1083, lr_0 = 1.5740e-04
Loss = 2.3243e-03, PNorm = 172.0716, GNorm = 0.2004, lr_0 = 1.5729e-04
Loss = 1.3484e-03, PNorm = 172.0746, GNorm = 0.0689, lr_0 = 1.5718e-04
Loss = 2.7091e-03, PNorm = 172.0769, GNorm = 0.1382, lr_0 = 1.5707e-04
Loss = 1.7830e-03, PNorm = 172.0792, GNorm = 0.0902, lr_0 = 1.5697e-04
Loss = 1.1396e-03, PNorm = 172.0800, GNorm = 0.0518, lr_0 = 1.5686e-04
Loss = 1.2039e-03, PNorm = 172.0822, GNorm = 0.0937, lr_0 = 1.5675e-04
Loss = 1.4425e-03, PNorm = 172.0837, GNorm = 0.1500, lr_0 = 1.5664e-04
Loss = 1.7765e-03, PNorm = 172.0856, GNorm = 0.2606, lr_0 = 1.5654e-04
Loss = 1.1722e-03, PNorm = 172.0888, GNorm = 0.1156, lr_0 = 1.5643e-04
Loss = 1.6442e-03, PNorm = 172.0911, GNorm = 0.1127, lr_0 = 1.5632e-04
Loss = 2.0295e-03, PNorm = 172.0940, GNorm = 0.1102, lr_0 = 1.5621e-04
Loss = 1.9753e-03, PNorm = 172.0967, GNorm = 0.1095, lr_0 = 1.5611e-04
Loss = 1.2594e-03, PNorm = 172.1006, GNorm = 0.0691, lr_0 = 1.5600e-04
Loss = 1.5157e-03, PNorm = 172.1037, GNorm = 0.1450, lr_0 = 1.5589e-04
Loss = 1.7318e-03, PNorm = 172.1068, GNorm = 0.1607, lr_0 = 1.5579e-04
Loss = 1.8906e-03, PNorm = 172.1097, GNorm = 0.1882, lr_0 = 1.5568e-04
Loss = 1.3053e-03, PNorm = 172.1123, GNorm = 0.0703, lr_0 = 1.5557e-04
Loss = 2.0317e-03, PNorm = 172.1123, GNorm = 0.4075, lr_0 = 1.5547e-04
Loss = 1.6919e-03, PNorm = 172.1152, GNorm = 0.0696, lr_0 = 1.5536e-04
Loss = 2.4711e-03, PNorm = 172.1195, GNorm = 0.2265, lr_0 = 1.5525e-04
Loss = 1.1643e-03, PNorm = 172.1245, GNorm = 0.0751, lr_0 = 1.5515e-04
Loss = 4.0825e-03, PNorm = 172.1266, GNorm = 0.2398, lr_0 = 1.5504e-04
Loss = 1.3317e-03, PNorm = 172.1279, GNorm = 0.1876, lr_0 = 1.5493e-04
Loss = 2.8108e-03, PNorm = 172.1301, GNorm = 0.1836, lr_0 = 1.5483e-04
Loss = 3.0741e-03, PNorm = 172.1340, GNorm = 0.2271, lr_0 = 1.5472e-04
Loss = 2.2125e-03, PNorm = 172.1361, GNorm = 0.3031, lr_0 = 1.5462e-04
Loss = 2.2068e-03, PNorm = 172.1398, GNorm = 0.1606, lr_0 = 1.5451e-04
Loss = 3.1124e-03, PNorm = 172.1408, GNorm = 0.0883, lr_0 = 1.5440e-04
Loss = 1.5473e-03, PNorm = 172.1422, GNorm = 0.1063, lr_0 = 1.5430e-04
Loss = 2.3931e-03, PNorm = 172.1447, GNorm = 0.1521, lr_0 = 1.5419e-04
Loss = 1.8034e-03, PNorm = 172.1485, GNorm = 0.1550, lr_0 = 1.5409e-04
Loss = 1.1572e-03, PNorm = 172.1520, GNorm = 0.0515, lr_0 = 1.5398e-04
Loss = 2.5492e-03, PNorm = 172.1566, GNorm = 0.3026, lr_0 = 1.5388e-04
Loss = 6.2848e-03, PNorm = 172.1623, GNorm = 0.0712, lr_0 = 1.5377e-04
Loss = 1.1158e-03, PNorm = 172.1657, GNorm = 0.1306, lr_0 = 1.5367e-04
Loss = 1.6255e-03, PNorm = 172.1707, GNorm = 0.1750, lr_0 = 1.5356e-04
Loss = 1.5965e-03, PNorm = 172.1736, GNorm = 0.1469, lr_0 = 1.5346e-04
Loss = 1.4058e-03, PNorm = 172.1765, GNorm = 0.0631, lr_0 = 1.5335e-04
Loss = 1.6236e-03, PNorm = 172.1778, GNorm = 0.0647, lr_0 = 1.5325e-04
Loss = 1.7994e-03, PNorm = 172.1805, GNorm = 0.1299, lr_0 = 1.5314e-04
Loss = 1.3098e-03, PNorm = 172.1820, GNorm = 0.1162, lr_0 = 1.5304e-04
Loss = 1.1752e-03, PNorm = 172.1840, GNorm = 0.1510, lr_0 = 1.5293e-04
Loss = 1.6100e-03, PNorm = 172.1867, GNorm = 0.1020, lr_0 = 1.5283e-04
Loss = 9.8309e-04, PNorm = 172.1900, GNorm = 0.0897, lr_0 = 1.5272e-04
Loss = 1.0525e-03, PNorm = 172.1916, GNorm = 0.1344, lr_0 = 1.5262e-04
Loss = 5.6708e-03, PNorm = 172.1935, GNorm = 0.1620, lr_0 = 1.5251e-04
Loss = 3.8174e-03, PNorm = 172.1964, GNorm = 0.5066, lr_0 = 1.5241e-04
Loss = 2.0540e-03, PNorm = 172.1995, GNorm = 0.1351, lr_0 = 1.5230e-04
Loss = 3.2249e-03, PNorm = 172.2018, GNorm = 0.2434, lr_0 = 1.5220e-04
Loss = 2.0502e-03, PNorm = 172.2053, GNorm = 0.0774, lr_0 = 1.5209e-04
Loss = 1.0791e-03, PNorm = 172.2096, GNorm = 0.1317, lr_0 = 1.5199e-04
Loss = 1.2075e-03, PNorm = 172.2127, GNorm = 0.0895, lr_0 = 1.5189e-04
Loss = 1.1274e-03, PNorm = 172.2152, GNorm = 0.1202, lr_0 = 1.5178e-04
Loss = 1.4172e-03, PNorm = 172.2179, GNorm = 0.0896, lr_0 = 1.5168e-04
Loss = 1.1002e-03, PNorm = 172.2207, GNorm = 0.0824, lr_0 = 1.5157e-04
Loss = 9.0705e-04, PNorm = 172.2244, GNorm = 0.0976, lr_0 = 1.5147e-04
Loss = 2.1752e-03, PNorm = 172.2270, GNorm = 0.0375, lr_0 = 1.5137e-04
Loss = 1.2908e-03, PNorm = 172.2281, GNorm = 0.0560, lr_0 = 1.5126e-04
Loss = 1.0390e-03, PNorm = 172.2312, GNorm = 0.1881, lr_0 = 1.5116e-04
Loss = 2.9928e-03, PNorm = 172.2343, GNorm = 0.0531, lr_0 = 1.5106e-04
Loss = 2.2221e-03, PNorm = 172.2358, GNorm = 0.0327, lr_0 = 1.5095e-04
Loss = 1.2302e-03, PNorm = 172.2374, GNorm = 0.1316, lr_0 = 1.5085e-04
Validation mae = 0.277996
Epoch 25
Loss = 1.4243e-03, PNorm = 172.2390, GNorm = 0.0939, lr_0 = 1.5075e-04
Loss = 1.8452e-03, PNorm = 172.2396, GNorm = 0.1045, lr_0 = 1.5064e-04
Loss = 9.9223e-04, PNorm = 172.2402, GNorm = 0.0698, lr_0 = 1.5054e-04
Loss = 1.0596e-03, PNorm = 172.2418, GNorm = 0.1744, lr_0 = 1.5044e-04
Loss = 1.7001e-03, PNorm = 172.2438, GNorm = 0.0751, lr_0 = 1.5033e-04
Loss = 9.1185e-04, PNorm = 172.2461, GNorm = 0.1224, lr_0 = 1.5023e-04
Loss = 2.0147e-03, PNorm = 172.2472, GNorm = 0.2433, lr_0 = 1.5013e-04
Loss = 1.5839e-03, PNorm = 172.2486, GNorm = 0.2359, lr_0 = 1.5002e-04
Loss = 1.2773e-03, PNorm = 172.2498, GNorm = 0.1566, lr_0 = 1.4992e-04
Loss = 2.1111e-03, PNorm = 172.2529, GNorm = 0.4167, lr_0 = 1.4982e-04
Loss = 1.8865e-03, PNorm = 172.2552, GNorm = 0.1079, lr_0 = 1.4972e-04
Loss = 1.7301e-03, PNorm = 172.2586, GNorm = 0.1285, lr_0 = 1.4961e-04
Loss = 1.5126e-03, PNorm = 172.2606, GNorm = 0.1239, lr_0 = 1.4951e-04
Loss = 1.1509e-03, PNorm = 172.2632, GNorm = 0.0679, lr_0 = 1.4941e-04
Loss = 2.7434e-03, PNorm = 172.2649, GNorm = 0.1218, lr_0 = 1.4931e-04
Loss = 1.2044e-03, PNorm = 172.2686, GNorm = 0.0611, lr_0 = 1.4920e-04
Loss = 2.1991e-03, PNorm = 172.2708, GNorm = 0.1660, lr_0 = 1.4910e-04
Loss = 1.5030e-03, PNorm = 172.2710, GNorm = 0.0975, lr_0 = 1.4900e-04
Loss = 3.2615e-03, PNorm = 172.2722, GNorm = 0.1124, lr_0 = 1.4890e-04
Loss = 1.3322e-03, PNorm = 172.2740, GNorm = 0.1136, lr_0 = 1.4880e-04
Loss = 1.7137e-03, PNorm = 172.2771, GNorm = 0.1991, lr_0 = 1.4869e-04
Loss = 1.5699e-03, PNorm = 172.2789, GNorm = 0.1529, lr_0 = 1.4859e-04
Loss = 1.8680e-03, PNorm = 172.2813, GNorm = 0.1545, lr_0 = 1.4849e-04
Loss = 1.3345e-03, PNorm = 172.2828, GNorm = 0.0948, lr_0 = 1.4839e-04
Loss = 1.6558e-03, PNorm = 172.2839, GNorm = 0.1863, lr_0 = 1.4829e-04
Loss = 2.6582e-03, PNorm = 172.2855, GNorm = 0.0895, lr_0 = 1.4818e-04
Loss = 1.0984e-03, PNorm = 172.2883, GNorm = 0.0414, lr_0 = 1.4808e-04
Loss = 1.9259e-03, PNorm = 172.2904, GNorm = 0.1228, lr_0 = 1.4798e-04
Loss = 1.2525e-03, PNorm = 172.2930, GNorm = 0.0530, lr_0 = 1.4788e-04
Loss = 1.6648e-03, PNorm = 172.2947, GNorm = 0.1453, lr_0 = 1.4778e-04
Loss = 1.5878e-03, PNorm = 172.2970, GNorm = 0.0895, lr_0 = 1.4768e-04
Loss = 1.4000e-03, PNorm = 172.2981, GNorm = 0.0408, lr_0 = 1.4758e-04
Loss = 1.5865e-03, PNorm = 172.3023, GNorm = 0.2409, lr_0 = 1.4748e-04
Loss = 1.6493e-03, PNorm = 172.3052, GNorm = 0.0538, lr_0 = 1.4737e-04
Loss = 1.0200e-03, PNorm = 172.3087, GNorm = 0.0944, lr_0 = 1.4727e-04
Loss = 1.4478e-03, PNorm = 172.3098, GNorm = 0.2059, lr_0 = 1.4717e-04
Loss = 2.3703e-03, PNorm = 172.3124, GNorm = 0.0495, lr_0 = 1.4707e-04
Loss = 1.0362e-03, PNorm = 172.3175, GNorm = 0.0418, lr_0 = 1.4697e-04
Loss = 8.7496e-04, PNorm = 172.3217, GNorm = 0.1056, lr_0 = 1.4687e-04
Loss = 3.4514e-03, PNorm = 172.3237, GNorm = 0.1853, lr_0 = 1.4677e-04
Loss = 1.5936e-03, PNorm = 172.3249, GNorm = 0.1568, lr_0 = 1.4667e-04
Loss = 8.6047e-04, PNorm = 172.3268, GNorm = 0.1377, lr_0 = 1.4657e-04
Loss = 2.5472e-03, PNorm = 172.3279, GNorm = 0.0775, lr_0 = 1.4647e-04
Loss = 1.4085e-03, PNorm = 172.3299, GNorm = 0.1136, lr_0 = 1.4637e-04
Loss = 9.2518e-04, PNorm = 172.3323, GNorm = 0.1473, lr_0 = 1.4627e-04
Loss = 1.6085e-03, PNorm = 172.3349, GNorm = 0.1015, lr_0 = 1.4617e-04
Loss = 9.5379e-04, PNorm = 172.3359, GNorm = 0.0629, lr_0 = 1.4607e-04
Loss = 3.2487e-03, PNorm = 172.3369, GNorm = 0.2744, lr_0 = 1.4597e-04
Loss = 1.0433e-03, PNorm = 172.3394, GNorm = 0.0732, lr_0 = 1.4587e-04
Loss = 1.8403e-03, PNorm = 172.3415, GNorm = 0.0946, lr_0 = 1.4577e-04
Loss = 2.2148e-03, PNorm = 172.3444, GNorm = 0.1275, lr_0 = 1.4567e-04
Loss = 1.6275e-03, PNorm = 172.3464, GNorm = 0.1403, lr_0 = 1.4557e-04
Loss = 2.0193e-03, PNorm = 172.3495, GNorm = 0.1951, lr_0 = 1.4547e-04
Loss = 1.1811e-03, PNorm = 172.3518, GNorm = 0.1188, lr_0 = 1.4537e-04
Loss = 9.7197e-04, PNorm = 172.3553, GNorm = 0.0606, lr_0 = 1.4527e-04
Loss = 1.9362e-03, PNorm = 172.3582, GNorm = 0.0491, lr_0 = 1.4517e-04
Loss = 1.2359e-03, PNorm = 172.3607, GNorm = 0.0478, lr_0 = 1.4507e-04
Loss = 1.6802e-03, PNorm = 172.3632, GNorm = 0.2753, lr_0 = 1.4497e-04
Loss = 2.3404e-03, PNorm = 172.3639, GNorm = 0.2499, lr_0 = 1.4487e-04
Loss = 1.5931e-03, PNorm = 172.3653, GNorm = 0.1460, lr_0 = 1.4477e-04
Loss = 1.1964e-03, PNorm = 172.3681, GNorm = 0.0486, lr_0 = 1.4467e-04
Loss = 2.2116e-03, PNorm = 172.3701, GNorm = 0.0607, lr_0 = 1.4457e-04
Loss = 2.4808e-03, PNorm = 172.3723, GNorm = 0.1152, lr_0 = 1.4447e-04
Loss = 2.0336e-03, PNorm = 172.3737, GNorm = 0.3076, lr_0 = 1.4438e-04
Loss = 9.6716e-04, PNorm = 172.3756, GNorm = 0.1585, lr_0 = 1.4428e-04
Loss = 2.1009e-03, PNorm = 172.3778, GNorm = 0.0580, lr_0 = 1.4418e-04
Loss = 9.1986e-04, PNorm = 172.3804, GNorm = 0.0530, lr_0 = 1.4408e-04
Loss = 1.0121e-03, PNorm = 172.3827, GNorm = 0.1373, lr_0 = 1.4398e-04
Loss = 1.4700e-03, PNorm = 172.3853, GNorm = 0.1296, lr_0 = 1.4388e-04
Loss = 1.0654e-03, PNorm = 172.3874, GNorm = 0.1128, lr_0 = 1.4378e-04
Loss = 2.3093e-03, PNorm = 172.3888, GNorm = 0.0785, lr_0 = 1.4368e-04
Loss = 1.0289e-03, PNorm = 172.3910, GNorm = 0.0659, lr_0 = 1.4359e-04
Loss = 1.0185e-03, PNorm = 172.3932, GNorm = 0.1357, lr_0 = 1.4349e-04
Loss = 1.5510e-03, PNorm = 172.3951, GNorm = 0.1287, lr_0 = 1.4339e-04
Loss = 3.3755e-03, PNorm = 172.3968, GNorm = 0.4509, lr_0 = 1.4329e-04
Loss = 1.0982e-03, PNorm = 172.3983, GNorm = 0.1457, lr_0 = 1.4319e-04
Loss = 1.0088e-03, PNorm = 172.4004, GNorm = 0.1721, lr_0 = 1.4310e-04
Loss = 2.2905e-03, PNorm = 172.4036, GNorm = 0.0296, lr_0 = 1.4300e-04
Loss = 1.0345e-03, PNorm = 172.4047, GNorm = 0.0983, lr_0 = 1.4290e-04
Loss = 2.2251e-03, PNorm = 172.4071, GNorm = 0.2325, lr_0 = 1.4280e-04
Loss = 2.5098e-03, PNorm = 172.4086, GNorm = 0.7317, lr_0 = 1.4270e-04
Loss = 2.4366e-03, PNorm = 172.4112, GNorm = 0.1253, lr_0 = 1.4261e-04
Loss = 5.0467e-03, PNorm = 172.4121, GNorm = 0.1614, lr_0 = 1.4251e-04
Loss = 1.8794e-03, PNorm = 172.4140, GNorm = 0.2370, lr_0 = 1.4241e-04
Loss = 2.0374e-03, PNorm = 172.4158, GNorm = 0.0501, lr_0 = 1.4231e-04
Loss = 1.2477e-03, PNorm = 172.4196, GNorm = 0.0971, lr_0 = 1.4222e-04
Loss = 1.4210e-03, PNorm = 172.4223, GNorm = 0.0795, lr_0 = 1.4212e-04
Loss = 9.3504e-04, PNorm = 172.4258, GNorm = 0.0676, lr_0 = 1.4202e-04
Loss = 9.9865e-04, PNorm = 172.4285, GNorm = 0.0883, lr_0 = 1.4192e-04
Loss = 2.7808e-03, PNorm = 172.4310, GNorm = 0.1234, lr_0 = 1.4183e-04
Loss = 1.1556e-03, PNorm = 172.4323, GNorm = 0.1522, lr_0 = 1.4173e-04
Loss = 2.7401e-03, PNorm = 172.4346, GNorm = 0.2074, lr_0 = 1.4163e-04
Loss = 1.9467e-03, PNorm = 172.4377, GNorm = 0.1883, lr_0 = 1.4153e-04
Loss = 1.9437e-03, PNorm = 172.4390, GNorm = 0.1202, lr_0 = 1.4144e-04
Loss = 1.7817e-03, PNorm = 172.4421, GNorm = 0.1310, lr_0 = 1.4134e-04
Loss = 2.5575e-03, PNorm = 172.4440, GNorm = 0.0313, lr_0 = 1.4124e-04
Loss = 1.1946e-03, PNorm = 172.4469, GNorm = 0.0940, lr_0 = 1.4115e-04
Loss = 4.1277e-03, PNorm = 172.4467, GNorm = 0.1266, lr_0 = 1.4105e-04
Loss = 2.6800e-03, PNorm = 172.4500, GNorm = 0.3734, lr_0 = 1.4095e-04
Loss = 2.1056e-03, PNorm = 172.4522, GNorm = 0.0290, lr_0 = 1.4086e-04
Loss = 1.2921e-03, PNorm = 172.4557, GNorm = 0.0901, lr_0 = 1.4076e-04
Loss = 2.6704e-03, PNorm = 172.4608, GNorm = 0.2513, lr_0 = 1.4066e-04
Loss = 1.0032e-03, PNorm = 172.4644, GNorm = 0.1426, lr_0 = 1.4057e-04
Loss = 1.3436e-03, PNorm = 172.4666, GNorm = 0.0484, lr_0 = 1.4047e-04
Loss = 1.7915e-03, PNorm = 172.4677, GNorm = 0.0749, lr_0 = 1.4038e-04
Loss = 1.0955e-03, PNorm = 172.4690, GNorm = 0.1500, lr_0 = 1.4028e-04
Loss = 1.1424e-03, PNorm = 172.4693, GNorm = 0.0974, lr_0 = 1.4018e-04
Loss = 2.7460e-03, PNorm = 172.4705, GNorm = 0.0355, lr_0 = 1.4009e-04
Loss = 9.5147e-04, PNorm = 172.4728, GNorm = 0.1088, lr_0 = 1.3999e-04
Loss = 9.7665e-04, PNorm = 172.4752, GNorm = 0.0720, lr_0 = 1.3990e-04
Loss = 2.1004e-03, PNorm = 172.4794, GNorm = 0.0586, lr_0 = 1.3980e-04
Loss = 1.9647e-03, PNorm = 172.4821, GNorm = 0.1816, lr_0 = 1.3970e-04
Loss = 1.0275e-03, PNorm = 172.4848, GNorm = 0.1009, lr_0 = 1.3961e-04
Loss = 2.1931e-03, PNorm = 172.4877, GNorm = 0.0562, lr_0 = 1.3951e-04
Loss = 1.4890e-03, PNorm = 172.4905, GNorm = 0.0825, lr_0 = 1.3942e-04
Loss = 1.2444e-03, PNorm = 172.4927, GNorm = 0.0607, lr_0 = 1.3932e-04
Loss = 2.3641e-03, PNorm = 172.4930, GNorm = 0.1078, lr_0 = 1.3923e-04
Loss = 8.4242e-04, PNorm = 172.4933, GNorm = 0.0754, lr_0 = 1.3913e-04
Loss = 3.1115e-03, PNorm = 172.4962, GNorm = 0.0718, lr_0 = 1.3904e-04
Loss = 1.3858e-03, PNorm = 172.5005, GNorm = 0.0923, lr_0 = 1.3894e-04
Validation mae = 0.277319
Epoch 26
Loss = 1.0651e-03, PNorm = 172.5043, GNorm = 0.0777, lr_0 = 1.3884e-04
Loss = 1.7885e-03, PNorm = 172.5078, GNorm = 0.1029, lr_0 = 1.3875e-04
Loss = 1.6853e-03, PNorm = 172.5082, GNorm = 0.1319, lr_0 = 1.3865e-04
Loss = 1.0678e-03, PNorm = 172.5095, GNorm = 0.0756, lr_0 = 1.3856e-04
Loss = 1.0953e-03, PNorm = 172.5099, GNorm = 0.0714, lr_0 = 1.3846e-04
Loss = 1.7808e-03, PNorm = 172.5109, GNorm = 0.1215, lr_0 = 1.3837e-04
Loss = 9.7832e-04, PNorm = 172.5129, GNorm = 0.1522, lr_0 = 1.3828e-04
Loss = 1.3658e-03, PNorm = 172.5154, GNorm = 0.1322, lr_0 = 1.3818e-04
Loss = 2.0296e-03, PNorm = 172.5169, GNorm = 0.1677, lr_0 = 1.3809e-04
Loss = 1.0124e-03, PNorm = 172.5179, GNorm = 0.0699, lr_0 = 1.3799e-04
Loss = 2.5130e-03, PNorm = 172.5196, GNorm = 0.3433, lr_0 = 1.3790e-04
Loss = 2.3498e-03, PNorm = 172.5209, GNorm = 0.1392, lr_0 = 1.3780e-04
Loss = 2.1573e-03, PNorm = 172.5239, GNorm = 0.0908, lr_0 = 1.3771e-04
Loss = 1.3093e-03, PNorm = 172.5249, GNorm = 0.0425, lr_0 = 1.3761e-04
Loss = 9.7730e-04, PNorm = 172.5268, GNorm = 0.1041, lr_0 = 1.3752e-04
Loss = 8.0741e-04, PNorm = 172.5287, GNorm = 0.1213, lr_0 = 1.3742e-04
Loss = 8.5402e-04, PNorm = 172.5297, GNorm = 0.0480, lr_0 = 1.3733e-04
Loss = 2.0362e-03, PNorm = 172.5311, GNorm = 0.0723, lr_0 = 1.3724e-04
Loss = 7.6443e-04, PNorm = 172.5322, GNorm = 0.0387, lr_0 = 1.3714e-04
Loss = 1.2122e-03, PNorm = 172.5337, GNorm = 0.0402, lr_0 = 1.3705e-04
Loss = 2.0572e-03, PNorm = 172.5348, GNorm = 0.1577, lr_0 = 1.3695e-04
Loss = 1.0218e-03, PNorm = 172.5363, GNorm = 0.1915, lr_0 = 1.3686e-04
Loss = 8.7229e-04, PNorm = 172.5382, GNorm = 0.0985, lr_0 = 1.3677e-04
Loss = 9.8825e-04, PNorm = 172.5394, GNorm = 0.0519, lr_0 = 1.3667e-04
Loss = 1.0187e-03, PNorm = 172.5413, GNorm = 0.1057, lr_0 = 1.3658e-04
Loss = 1.7628e-03, PNorm = 172.5425, GNorm = 0.1431, lr_0 = 1.3649e-04
Loss = 2.6227e-03, PNorm = 172.5448, GNorm = 0.2565, lr_0 = 1.3639e-04
Loss = 9.6351e-04, PNorm = 172.5477, GNorm = 0.0650, lr_0 = 1.3630e-04
Loss = 1.5021e-03, PNorm = 172.5488, GNorm = 0.1059, lr_0 = 1.3621e-04
Loss = 1.3687e-03, PNorm = 172.5503, GNorm = 0.0678, lr_0 = 1.3611e-04
Loss = 1.3889e-03, PNorm = 172.5517, GNorm = 0.0601, lr_0 = 1.3602e-04
Loss = 1.4536e-03, PNorm = 172.5533, GNorm = 0.0853, lr_0 = 1.3593e-04
Loss = 1.2971e-03, PNorm = 172.5561, GNorm = 0.1497, lr_0 = 1.3583e-04
Loss = 9.9924e-04, PNorm = 172.5587, GNorm = 0.0950, lr_0 = 1.3574e-04
Loss = 9.1380e-04, PNorm = 172.5609, GNorm = 0.0308, lr_0 = 1.3565e-04
Loss = 8.2088e-04, PNorm = 172.5621, GNorm = 0.1001, lr_0 = 1.3555e-04
Loss = 8.9412e-04, PNorm = 172.5634, GNorm = 0.0822, lr_0 = 1.3546e-04
Loss = 1.3606e-03, PNorm = 172.5652, GNorm = 0.0572, lr_0 = 1.3537e-04
Loss = 8.0743e-04, PNorm = 172.5672, GNorm = 0.0634, lr_0 = 1.3528e-04
Loss = 8.5041e-04, PNorm = 172.5692, GNorm = 0.0800, lr_0 = 1.3518e-04
Loss = 1.1150e-03, PNorm = 172.5709, GNorm = 0.0565, lr_0 = 1.3509e-04
Loss = 2.1986e-03, PNorm = 172.5719, GNorm = 0.0562, lr_0 = 1.3500e-04
Loss = 1.4348e-03, PNorm = 172.5737, GNorm = 0.0434, lr_0 = 1.3491e-04
Loss = 1.6528e-03, PNorm = 172.5757, GNorm = 0.5040, lr_0 = 1.3481e-04
Loss = 1.4787e-03, PNorm = 172.5768, GNorm = 0.0674, lr_0 = 1.3472e-04
Loss = 1.5688e-03, PNorm = 172.5781, GNorm = 0.1478, lr_0 = 1.3463e-04
Loss = 9.8580e-04, PNorm = 172.5805, GNorm = 0.0678, lr_0 = 1.3454e-04
Loss = 1.6541e-03, PNorm = 172.5839, GNorm = 0.0573, lr_0 = 1.3444e-04
Loss = 1.1150e-03, PNorm = 172.5872, GNorm = 0.1067, lr_0 = 1.3435e-04
Loss = 1.3331e-03, PNorm = 172.5894, GNorm = 0.1663, lr_0 = 1.3426e-04
Loss = 8.4843e-04, PNorm = 172.5924, GNorm = 0.0557, lr_0 = 1.3417e-04
Loss = 1.4868e-03, PNorm = 172.5933, GNorm = 0.0461, lr_0 = 1.3408e-04
Loss = 2.1141e-03, PNorm = 172.5955, GNorm = 0.1660, lr_0 = 1.3398e-04
Loss = 2.0034e-03, PNorm = 172.5974, GNorm = 0.1277, lr_0 = 1.3389e-04
Loss = 7.8258e-04, PNorm = 172.5989, GNorm = 0.1459, lr_0 = 1.3380e-04
Loss = 1.0050e-03, PNorm = 172.6011, GNorm = 0.0221, lr_0 = 1.3371e-04
Loss = 7.2778e-04, PNorm = 172.6027, GNorm = 0.0438, lr_0 = 1.3362e-04
Loss = 1.4149e-03, PNorm = 172.6046, GNorm = 0.0729, lr_0 = 1.3353e-04
Loss = 8.6410e-04, PNorm = 172.6056, GNorm = 0.0797, lr_0 = 1.3343e-04
Loss = 1.3786e-03, PNorm = 172.6073, GNorm = 0.0784, lr_0 = 1.3334e-04
Loss = 9.1741e-04, PNorm = 172.6100, GNorm = 0.0625, lr_0 = 1.3325e-04
Loss = 2.5400e-03, PNorm = 172.6117, GNorm = 0.0669, lr_0 = 1.3316e-04
Loss = 5.6520e-03, PNorm = 172.6145, GNorm = 0.4545, lr_0 = 1.3307e-04
Loss = 1.7981e-03, PNorm = 172.6167, GNorm = 0.1926, lr_0 = 1.3298e-04
Loss = 1.1307e-03, PNorm = 172.6196, GNorm = 0.0342, lr_0 = 1.3289e-04
Loss = 1.1706e-03, PNorm = 172.6221, GNorm = 0.0744, lr_0 = 1.3280e-04
Loss = 1.6443e-03, PNorm = 172.6234, GNorm = 0.1503, lr_0 = 1.3270e-04
Loss = 9.3285e-04, PNorm = 172.6258, GNorm = 0.0958, lr_0 = 1.3261e-04
Loss = 1.1380e-03, PNorm = 172.6275, GNorm = 0.1446, lr_0 = 1.3252e-04
Loss = 1.8295e-03, PNorm = 172.6298, GNorm = 0.0675, lr_0 = 1.3243e-04
Loss = 3.1936e-03, PNorm = 172.6305, GNorm = 0.0432, lr_0 = 1.3234e-04
Loss = 2.0160e-03, PNorm = 172.6314, GNorm = 0.0994, lr_0 = 1.3225e-04
Loss = 1.5628e-03, PNorm = 172.6335, GNorm = 0.1382, lr_0 = 1.3216e-04
Loss = 9.5754e-04, PNorm = 172.6361, GNorm = 0.0302, lr_0 = 1.3207e-04
Loss = 1.5185e-03, PNorm = 172.6373, GNorm = 0.0984, lr_0 = 1.3198e-04
Loss = 2.4987e-03, PNorm = 172.6399, GNorm = 0.0420, lr_0 = 1.3189e-04
Loss = 1.4624e-03, PNorm = 172.6417, GNorm = 0.1723, lr_0 = 1.3180e-04
Loss = 2.3111e-03, PNorm = 172.6434, GNorm = 0.1118, lr_0 = 1.3171e-04
Loss = 3.0459e-03, PNorm = 172.6454, GNorm = 0.1107, lr_0 = 1.3162e-04
Loss = 2.1500e-03, PNorm = 172.6471, GNorm = 0.0589, lr_0 = 1.3153e-04
Loss = 9.2319e-04, PNorm = 172.6483, GNorm = 0.1308, lr_0 = 1.3144e-04
Loss = 1.6761e-03, PNorm = 172.6485, GNorm = 0.1611, lr_0 = 1.3135e-04
Loss = 1.2193e-03, PNorm = 172.6493, GNorm = 0.2537, lr_0 = 1.3126e-04
Loss = 8.5977e-04, PNorm = 172.6517, GNorm = 0.1883, lr_0 = 1.3117e-04
Loss = 4.0073e-03, PNorm = 172.6542, GNorm = 0.1064, lr_0 = 1.3108e-04
Loss = 2.9804e-03, PNorm = 172.6567, GNorm = 0.0921, lr_0 = 1.3099e-04
Loss = 2.6114e-03, PNorm = 172.6587, GNorm = 0.0652, lr_0 = 1.3090e-04
Loss = 1.4351e-03, PNorm = 172.6610, GNorm = 0.0690, lr_0 = 1.3081e-04
Loss = 8.6088e-04, PNorm = 172.6623, GNorm = 0.0390, lr_0 = 1.3072e-04
Loss = 2.3773e-03, PNorm = 172.6645, GNorm = 0.0714, lr_0 = 1.3063e-04
Loss = 1.2852e-03, PNorm = 172.6656, GNorm = 0.0322, lr_0 = 1.3054e-04
Loss = 1.0128e-03, PNorm = 172.6679, GNorm = 0.0356, lr_0 = 1.3045e-04
Loss = 1.8781e-03, PNorm = 172.6696, GNorm = 0.1552, lr_0 = 1.3036e-04
Loss = 1.9867e-03, PNorm = 172.6714, GNorm = 0.0640, lr_0 = 1.3027e-04
Loss = 2.1061e-03, PNorm = 172.6730, GNorm = 0.1177, lr_0 = 1.3018e-04
Loss = 1.4599e-03, PNorm = 172.6748, GNorm = 0.0893, lr_0 = 1.3009e-04
Loss = 1.0587e-03, PNorm = 172.6771, GNorm = 0.0495, lr_0 = 1.3000e-04
Loss = 1.2173e-03, PNorm = 172.6792, GNorm = 0.0906, lr_0 = 1.2992e-04
Loss = 3.1959e-03, PNorm = 172.6807, GNorm = 0.2128, lr_0 = 1.2983e-04
Loss = 9.0289e-04, PNorm = 172.6846, GNorm = 0.0573, lr_0 = 1.2974e-04
Loss = 1.0494e-03, PNorm = 172.6860, GNorm = 0.1284, lr_0 = 1.2965e-04
Loss = 1.9719e-03, PNorm = 172.6888, GNorm = 0.2030, lr_0 = 1.2956e-04
Loss = 1.8609e-03, PNorm = 172.6922, GNorm = 0.0565, lr_0 = 1.2947e-04
Loss = 1.2482e-03, PNorm = 172.6951, GNorm = 0.0450, lr_0 = 1.2938e-04
Loss = 1.3282e-03, PNorm = 172.6974, GNorm = 0.1095, lr_0 = 1.2929e-04
Loss = 1.1591e-03, PNorm = 172.7003, GNorm = 0.0960, lr_0 = 1.2921e-04
Loss = 8.8665e-04, PNorm = 172.7024, GNorm = 0.0742, lr_0 = 1.2912e-04
Loss = 1.2869e-03, PNorm = 172.7035, GNorm = 0.0915, lr_0 = 1.2903e-04
Loss = 1.2651e-03, PNorm = 172.7045, GNorm = 0.1980, lr_0 = 1.2894e-04
Loss = 7.9361e-03, PNorm = 172.7060, GNorm = 0.2082, lr_0 = 1.2885e-04
Loss = 7.5104e-04, PNorm = 172.7092, GNorm = 0.1131, lr_0 = 1.2876e-04
Loss = 1.1374e-03, PNorm = 172.7117, GNorm = 0.0932, lr_0 = 1.2867e-04
Loss = 1.2739e-03, PNorm = 172.7124, GNorm = 0.1659, lr_0 = 1.2859e-04
Loss = 9.0306e-04, PNorm = 172.7136, GNorm = 0.2086, lr_0 = 1.2850e-04
Loss = 1.9286e-03, PNorm = 172.7144, GNorm = 0.3286, lr_0 = 1.2841e-04
Loss = 1.2319e-03, PNorm = 172.7154, GNorm = 0.1174, lr_0 = 1.2832e-04
Loss = 2.2606e-03, PNorm = 172.7167, GNorm = 0.1091, lr_0 = 1.2823e-04
Loss = 7.8804e-04, PNorm = 172.7187, GNorm = 0.0774, lr_0 = 1.2815e-04
Loss = 8.9895e-04, PNorm = 172.7204, GNorm = 0.0507, lr_0 = 1.2806e-04
Loss = 1.1578e-03, PNorm = 172.7220, GNorm = 0.2869, lr_0 = 1.2797e-04
Validation mae = 0.277474
Epoch 27
Loss = 8.2959e-04, PNorm = 172.7225, GNorm = 0.1371, lr_0 = 1.2788e-04
Loss = 7.8868e-04, PNorm = 172.7240, GNorm = 0.0862, lr_0 = 1.2780e-04
Loss = 7.3620e-04, PNorm = 172.7258, GNorm = 0.0910, lr_0 = 1.2771e-04
Loss = 7.0961e-04, PNorm = 172.7280, GNorm = 0.1671, lr_0 = 1.2762e-04
Loss = 2.4134e-03, PNorm = 172.7291, GNorm = 0.1190, lr_0 = 1.2753e-04
Loss = 1.1728e-03, PNorm = 172.7296, GNorm = 0.0907, lr_0 = 1.2745e-04
Loss = 1.5191e-03, PNorm = 172.7304, GNorm = 0.0781, lr_0 = 1.2736e-04
Loss = 1.1748e-03, PNorm = 172.7302, GNorm = 0.0420, lr_0 = 1.2727e-04
Loss = 1.4073e-03, PNorm = 172.7315, GNorm = 0.1275, lr_0 = 1.2718e-04
Loss = 1.6930e-03, PNorm = 172.7323, GNorm = 0.1118, lr_0 = 1.2710e-04
Loss = 2.1484e-03, PNorm = 172.7349, GNorm = 0.1161, lr_0 = 1.2701e-04
Loss = 1.5095e-03, PNorm = 172.7365, GNorm = 0.2269, lr_0 = 1.2692e-04
Loss = 6.8498e-04, PNorm = 172.7382, GNorm = 0.0905, lr_0 = 1.2684e-04
Loss = 2.1911e-03, PNorm = 172.7396, GNorm = 0.0305, lr_0 = 1.2675e-04
Loss = 8.7244e-04, PNorm = 172.7410, GNorm = 0.1167, lr_0 = 1.2666e-04
Loss = 1.4830e-03, PNorm = 172.7420, GNorm = 0.0662, lr_0 = 1.2658e-04
Loss = 1.0070e-03, PNorm = 172.7431, GNorm = 0.0525, lr_0 = 1.2649e-04
Loss = 5.6264e-04, PNorm = 172.7443, GNorm = 0.0497, lr_0 = 1.2640e-04
Loss = 1.4580e-03, PNorm = 172.7462, GNorm = 0.1353, lr_0 = 1.2632e-04
Loss = 8.1087e-04, PNorm = 172.7469, GNorm = 0.1017, lr_0 = 1.2623e-04
Loss = 2.6780e-03, PNorm = 172.7486, GNorm = 0.3124, lr_0 = 1.2614e-04
Loss = 1.0649e-03, PNorm = 172.7499, GNorm = 0.0837, lr_0 = 1.2606e-04
Loss = 1.8558e-03, PNorm = 172.7510, GNorm = 0.1817, lr_0 = 1.2597e-04
Loss = 7.5418e-04, PNorm = 172.7525, GNorm = 0.2130, lr_0 = 1.2588e-04
Loss = 1.1696e-03, PNorm = 172.7533, GNorm = 0.0547, lr_0 = 1.2580e-04
Loss = 1.1186e-03, PNorm = 172.7537, GNorm = 0.0776, lr_0 = 1.2571e-04
Loss = 1.5915e-03, PNorm = 172.7558, GNorm = 0.0396, lr_0 = 1.2563e-04
Loss = 1.0144e-03, PNorm = 172.7581, GNorm = 0.0362, lr_0 = 1.2554e-04
Loss = 2.9683e-03, PNorm = 172.7595, GNorm = 0.1520, lr_0 = 1.2545e-04
Loss = 7.3614e-04, PNorm = 172.7596, GNorm = 0.0467, lr_0 = 1.2537e-04
Loss = 1.6904e-03, PNorm = 172.7603, GNorm = 0.1631, lr_0 = 1.2528e-04
Loss = 1.6536e-03, PNorm = 172.7616, GNorm = 0.0914, lr_0 = 1.2520e-04
Loss = 8.4071e-04, PNorm = 172.7626, GNorm = 0.0572, lr_0 = 1.2511e-04
Loss = 9.4397e-04, PNorm = 172.7643, GNorm = 0.2922, lr_0 = 1.2502e-04
Loss = 8.7717e-04, PNorm = 172.7650, GNorm = 0.1003, lr_0 = 1.2494e-04
Loss = 6.4492e-04, PNorm = 172.7678, GNorm = 0.0843, lr_0 = 1.2485e-04
Loss = 1.1920e-03, PNorm = 172.7688, GNorm = 0.0712, lr_0 = 1.2477e-04
Loss = 1.6976e-03, PNorm = 172.7709, GNorm = 0.1452, lr_0 = 1.2468e-04
Loss = 3.0201e-03, PNorm = 172.7719, GNorm = 0.1283, lr_0 = 1.2460e-04
Loss = 1.4409e-03, PNorm = 172.7737, GNorm = 0.0993, lr_0 = 1.2451e-04
Loss = 9.8756e-04, PNorm = 172.7754, GNorm = 0.1080, lr_0 = 1.2443e-04
Loss = 1.8199e-03, PNorm = 172.7768, GNorm = 0.1450, lr_0 = 1.2434e-04
Loss = 9.3927e-04, PNorm = 172.7779, GNorm = 0.1047, lr_0 = 1.2426e-04
Loss = 1.5152e-03, PNorm = 172.7795, GNorm = 0.0529, lr_0 = 1.2417e-04
Loss = 6.1856e-04, PNorm = 172.7811, GNorm = 0.1510, lr_0 = 1.2409e-04
Loss = 1.3385e-03, PNorm = 172.7826, GNorm = 0.1550, lr_0 = 1.2400e-04
Loss = 1.5989e-03, PNorm = 172.7836, GNorm = 0.0654, lr_0 = 1.2392e-04
Loss = 6.6039e-04, PNorm = 172.7866, GNorm = 0.1220, lr_0 = 1.2383e-04
Loss = 1.4749e-03, PNorm = 172.7887, GNorm = 0.1727, lr_0 = 1.2375e-04
Loss = 3.1723e-03, PNorm = 172.7926, GNorm = 0.1366, lr_0 = 1.2366e-04
Loss = 7.5900e-04, PNorm = 172.7946, GNorm = 0.0945, lr_0 = 1.2358e-04
Loss = 1.7907e-03, PNorm = 172.7970, GNorm = 0.1632, lr_0 = 1.2349e-04
Loss = 1.0041e-03, PNorm = 172.7985, GNorm = 0.0445, lr_0 = 1.2341e-04
Loss = 1.3010e-03, PNorm = 172.7997, GNorm = 0.0869, lr_0 = 1.2332e-04
Loss = 5.3327e-04, PNorm = 172.8002, GNorm = 0.1235, lr_0 = 1.2324e-04
Loss = 9.9315e-04, PNorm = 172.8009, GNorm = 0.1029, lr_0 = 1.2315e-04
Loss = 6.7821e-04, PNorm = 172.8007, GNorm = 0.0746, lr_0 = 1.2307e-04
Loss = 9.4762e-04, PNorm = 172.8001, GNorm = 0.1006, lr_0 = 1.2298e-04
Loss = 1.2335e-03, PNorm = 172.8015, GNorm = 0.1113, lr_0 = 1.2290e-04
Loss = 8.3285e-04, PNorm = 172.8032, GNorm = 0.0901, lr_0 = 1.2282e-04
Loss = 6.5384e-04, PNorm = 172.8041, GNorm = 0.0990, lr_0 = 1.2273e-04
Loss = 7.3682e-04, PNorm = 172.8050, GNorm = 0.0693, lr_0 = 1.2265e-04
Loss = 3.3971e-03, PNorm = 172.8073, GNorm = 0.1783, lr_0 = 1.2256e-04
Loss = 1.6553e-03, PNorm = 172.8102, GNorm = 0.0997, lr_0 = 1.2248e-04
Loss = 6.6037e-04, PNorm = 172.8119, GNorm = 0.1153, lr_0 = 1.2240e-04
Loss = 6.7224e-04, PNorm = 172.8128, GNorm = 0.0800, lr_0 = 1.2231e-04
Loss = 1.6734e-03, PNorm = 172.8151, GNorm = 0.1360, lr_0 = 1.2223e-04
Loss = 1.9422e-03, PNorm = 172.8161, GNorm = 0.1709, lr_0 = 1.2214e-04
Loss = 7.9795e-04, PNorm = 172.8186, GNorm = 0.1489, lr_0 = 1.2206e-04
Loss = 7.8237e-04, PNorm = 172.8194, GNorm = 0.0651, lr_0 = 1.2198e-04
Loss = 2.6303e-03, PNorm = 172.8208, GNorm = 0.4126, lr_0 = 1.2189e-04
Loss = 1.1710e-03, PNorm = 172.8214, GNorm = 0.0722, lr_0 = 1.2181e-04
Loss = 7.3900e-04, PNorm = 172.8228, GNorm = 0.0254, lr_0 = 1.2173e-04
Loss = 7.8577e-04, PNorm = 172.8253, GNorm = 0.0723, lr_0 = 1.2164e-04
Loss = 1.1653e-03, PNorm = 172.8264, GNorm = 0.0581, lr_0 = 1.2156e-04
Loss = 1.2124e-03, PNorm = 172.8280, GNorm = 0.1233, lr_0 = 1.2148e-04
Loss = 2.1355e-03, PNorm = 172.8291, GNorm = 0.0360, lr_0 = 1.2139e-04
Loss = 1.5534e-03, PNorm = 172.8302, GNorm = 0.0412, lr_0 = 1.2131e-04
Loss = 5.8975e-03, PNorm = 172.8291, GNorm = 0.3102, lr_0 = 1.2123e-04
Loss = 7.2310e-04, PNorm = 172.8306, GNorm = 0.1857, lr_0 = 1.2114e-04
Loss = 2.8376e-03, PNorm = 172.8331, GNorm = 0.5451, lr_0 = 1.2106e-04
Loss = 1.3876e-03, PNorm = 172.8368, GNorm = 0.0477, lr_0 = 1.2098e-04
Loss = 1.2371e-03, PNorm = 172.8378, GNorm = 0.1121, lr_0 = 1.2090e-04
Loss = 1.9985e-03, PNorm = 172.8398, GNorm = 0.1238, lr_0 = 1.2081e-04
Loss = 1.0949e-03, PNorm = 172.8411, GNorm = 0.0246, lr_0 = 1.2073e-04
Loss = 1.4109e-03, PNorm = 172.8442, GNorm = 0.0263, lr_0 = 1.2065e-04
Loss = 7.7752e-04, PNorm = 172.8465, GNorm = 0.0984, lr_0 = 1.2056e-04
Loss = 1.5231e-03, PNorm = 172.8502, GNorm = 0.0887, lr_0 = 1.2048e-04
Loss = 1.0814e-03, PNorm = 172.8517, GNorm = 0.1049, lr_0 = 1.2040e-04
Loss = 9.7040e-04, PNorm = 172.8521, GNorm = 0.0937, lr_0 = 1.2032e-04
Loss = 2.8373e-03, PNorm = 172.8536, GNorm = 0.1990, lr_0 = 1.2023e-04
Loss = 8.4534e-04, PNorm = 172.8545, GNorm = 0.1424, lr_0 = 1.2015e-04
Loss = 1.0821e-03, PNorm = 172.8560, GNorm = 0.1430, lr_0 = 1.2007e-04
Loss = 1.1667e-03, PNorm = 172.8568, GNorm = 0.1078, lr_0 = 1.1999e-04
Loss = 1.8387e-03, PNorm = 172.8583, GNorm = 0.0925, lr_0 = 1.1991e-04
Loss = 2.2839e-03, PNorm = 172.8599, GNorm = 0.1038, lr_0 = 1.1982e-04
Loss = 8.2363e-04, PNorm = 172.8626, GNorm = 0.0929, lr_0 = 1.1974e-04
Loss = 3.5043e-03, PNorm = 172.8641, GNorm = 0.1560, lr_0 = 1.1966e-04
Loss = 8.3594e-04, PNorm = 172.8655, GNorm = 0.1306, lr_0 = 1.1958e-04
Loss = 8.8191e-04, PNorm = 172.8676, GNorm = 0.1102, lr_0 = 1.1950e-04
Loss = 6.1077e-04, PNorm = 172.8692, GNorm = 0.1729, lr_0 = 1.1941e-04
Loss = 7.0375e-04, PNorm = 172.8716, GNorm = 0.1304, lr_0 = 1.1933e-04
Loss = 1.8024e-03, PNorm = 172.8716, GNorm = 0.0763, lr_0 = 1.1925e-04
Loss = 8.1184e-04, PNorm = 172.8734, GNorm = 0.0370, lr_0 = 1.1917e-04
Loss = 7.3947e-04, PNorm = 172.8744, GNorm = 0.0797, lr_0 = 1.1909e-04
Loss = 2.5916e-03, PNorm = 172.8753, GNorm = 0.1066, lr_0 = 1.1901e-04
Loss = 1.1983e-03, PNorm = 172.8763, GNorm = 0.1437, lr_0 = 1.1892e-04
Loss = 3.5882e-03, PNorm = 172.8766, GNorm = 0.0631, lr_0 = 1.1884e-04
Loss = 9.8588e-04, PNorm = 172.8782, GNorm = 0.0403, lr_0 = 1.1876e-04
Loss = 6.9968e-04, PNorm = 172.8807, GNorm = 0.0816, lr_0 = 1.1868e-04
Loss = 1.2339e-03, PNorm = 172.8836, GNorm = 0.0901, lr_0 = 1.1860e-04
Loss = 5.8754e-04, PNorm = 172.8854, GNorm = 0.0646, lr_0 = 1.1852e-04
Loss = 8.9741e-04, PNorm = 172.8869, GNorm = 0.0909, lr_0 = 1.1844e-04
Loss = 1.1995e-03, PNorm = 172.8868, GNorm = 0.0373, lr_0 = 1.1835e-04
Loss = 1.9718e-03, PNorm = 172.8873, GNorm = 0.0368, lr_0 = 1.1827e-04
Loss = 2.9172e-03, PNorm = 172.8906, GNorm = 0.0760, lr_0 = 1.1819e-04
Loss = 1.7757e-03, PNorm = 172.8937, GNorm = 0.2147, lr_0 = 1.1811e-04
Loss = 1.1607e-03, PNorm = 172.8973, GNorm = 0.1852, lr_0 = 1.1803e-04
Loss = 9.1408e-04, PNorm = 172.9000, GNorm = 0.0369, lr_0 = 1.1795e-04
Loss = 2.3656e-03, PNorm = 172.9016, GNorm = 0.0391, lr_0 = 1.1787e-04
Validation mae = 0.277446
Epoch 28
Loss = 1.1985e-03, PNorm = 172.9034, GNorm = 0.0835, lr_0 = 1.1779e-04
Loss = 1.6523e-03, PNorm = 172.9040, GNorm = 0.1090, lr_0 = 1.1771e-04
Loss = 8.2783e-04, PNorm = 172.9063, GNorm = 0.1463, lr_0 = 1.1763e-04
Loss = 1.1037e-03, PNorm = 172.9072, GNorm = 0.0732, lr_0 = 1.1755e-04
Loss = 7.4858e-04, PNorm = 172.9089, GNorm = 0.0251, lr_0 = 1.1747e-04
Loss = 1.1740e-03, PNorm = 172.9084, GNorm = 0.0970, lr_0 = 1.1739e-04
Loss = 8.0885e-04, PNorm = 172.9076, GNorm = 0.0386, lr_0 = 1.1730e-04
Loss = 6.8211e-04, PNorm = 172.9086, GNorm = 0.0586, lr_0 = 1.1722e-04
Loss = 1.3092e-03, PNorm = 172.9116, GNorm = 0.2449, lr_0 = 1.1714e-04
Loss = 6.1857e-04, PNorm = 172.9140, GNorm = 0.1062, lr_0 = 1.1706e-04
Loss = 1.6775e-03, PNorm = 172.9170, GNorm = 0.0978, lr_0 = 1.1698e-04
Loss = 8.0658e-04, PNorm = 172.9172, GNorm = 0.0939, lr_0 = 1.1690e-04
Loss = 1.3428e-03, PNorm = 172.9177, GNorm = 0.0727, lr_0 = 1.1682e-04
Loss = 1.6969e-03, PNorm = 172.9185, GNorm = 0.0930, lr_0 = 1.1674e-04
Loss = 9.4754e-04, PNorm = 172.9205, GNorm = 0.0434, lr_0 = 1.1666e-04
Loss = 2.1114e-03, PNorm = 172.9215, GNorm = 0.0702, lr_0 = 1.1658e-04
Loss = 5.1408e-04, PNorm = 172.9223, GNorm = 0.0532, lr_0 = 1.1650e-04
Loss = 7.1004e-04, PNorm = 172.9231, GNorm = 0.0625, lr_0 = 1.1642e-04
Loss = 1.1255e-03, PNorm = 172.9239, GNorm = 0.1049, lr_0 = 1.1634e-04
Loss = 2.9137e-03, PNorm = 172.9254, GNorm = 0.1978, lr_0 = 1.1626e-04
Loss = 6.6450e-04, PNorm = 172.9278, GNorm = 0.0883, lr_0 = 1.1618e-04
Loss = 8.7942e-04, PNorm = 172.9294, GNorm = 0.0457, lr_0 = 1.1611e-04
Loss = 8.3001e-04, PNorm = 172.9300, GNorm = 0.0523, lr_0 = 1.1603e-04
Loss = 7.8634e-04, PNorm = 172.9308, GNorm = 0.0308, lr_0 = 1.1595e-04
Loss = 2.0328e-03, PNorm = 172.9312, GNorm = 0.1617, lr_0 = 1.1587e-04
Loss = 5.9314e-04, PNorm = 172.9323, GNorm = 0.1130, lr_0 = 1.1579e-04
Loss = 1.1275e-03, PNorm = 172.9340, GNorm = 0.0877, lr_0 = 1.1571e-04
Loss = 1.1117e-03, PNorm = 172.9349, GNorm = 0.0724, lr_0 = 1.1563e-04
Loss = 2.8225e-03, PNorm = 172.9375, GNorm = 0.0803, lr_0 = 1.1555e-04
Loss = 6.8713e-04, PNorm = 172.9378, GNorm = 0.0804, lr_0 = 1.1547e-04
Loss = 6.4765e-04, PNorm = 172.9384, GNorm = 0.0747, lr_0 = 1.1539e-04
Loss = 1.2238e-03, PNorm = 172.9386, GNorm = 0.0332, lr_0 = 1.1531e-04
Loss = 1.2022e-03, PNorm = 172.9400, GNorm = 0.1017, lr_0 = 1.1523e-04
Loss = 1.2927e-03, PNorm = 172.9415, GNorm = 0.2367, lr_0 = 1.1515e-04
Loss = 6.7197e-04, PNorm = 172.9432, GNorm = 0.0623, lr_0 = 1.1508e-04
Loss = 7.5737e-04, PNorm = 172.9445, GNorm = 0.0297, lr_0 = 1.1500e-04
Loss = 1.1566e-03, PNorm = 172.9448, GNorm = 0.0684, lr_0 = 1.1492e-04
Loss = 1.0043e-03, PNorm = 172.9451, GNorm = 0.0661, lr_0 = 1.1484e-04
Loss = 9.5237e-04, PNorm = 172.9463, GNorm = 0.0437, lr_0 = 1.1476e-04
Loss = 6.1999e-04, PNorm = 172.9477, GNorm = 0.2765, lr_0 = 1.1468e-04
Loss = 1.0552e-03, PNorm = 172.9489, GNorm = 0.1176, lr_0 = 1.1460e-04
Loss = 7.5041e-04, PNorm = 172.9507, GNorm = 0.1121, lr_0 = 1.1452e-04
Loss = 1.0436e-03, PNorm = 172.9521, GNorm = 0.1222, lr_0 = 1.1445e-04
Loss = 2.2276e-03, PNorm = 172.9541, GNorm = 0.0404, lr_0 = 1.1437e-04
Loss = 9.6815e-04, PNorm = 172.9556, GNorm = 0.1244, lr_0 = 1.1429e-04
Loss = 1.0524e-03, PNorm = 172.9565, GNorm = 0.0364, lr_0 = 1.1421e-04
Loss = 1.0153e-03, PNorm = 172.9580, GNorm = 0.0554, lr_0 = 1.1413e-04
Loss = 1.7431e-03, PNorm = 172.9593, GNorm = 0.1496, lr_0 = 1.1405e-04
Loss = 1.3043e-03, PNorm = 172.9617, GNorm = 0.0386, lr_0 = 1.1398e-04
Loss = 6.8658e-04, PNorm = 172.9644, GNorm = 0.0932, lr_0 = 1.1390e-04
Loss = 1.2077e-03, PNorm = 172.9658, GNorm = 0.0688, lr_0 = 1.1382e-04
Loss = 2.5299e-03, PNorm = 172.9661, GNorm = 0.0790, lr_0 = 1.1374e-04
Loss = 6.3178e-04, PNorm = 172.9665, GNorm = 0.0565, lr_0 = 1.1366e-04
Loss = 8.7660e-04, PNorm = 172.9672, GNorm = 0.1010, lr_0 = 1.1359e-04
Loss = 1.6633e-03, PNorm = 172.9688, GNorm = 0.0641, lr_0 = 1.1351e-04
Loss = 2.0988e-03, PNorm = 172.9706, GNorm = 0.0502, lr_0 = 1.1343e-04
Loss = 1.0335e-03, PNorm = 172.9718, GNorm = 0.0585, lr_0 = 1.1335e-04
Loss = 5.5907e-04, PNorm = 172.9720, GNorm = 0.0564, lr_0 = 1.1328e-04
Loss = 8.2839e-04, PNorm = 172.9745, GNorm = 0.0549, lr_0 = 1.1320e-04
Loss = 1.9266e-03, PNorm = 172.9748, GNorm = 0.3829, lr_0 = 1.1312e-04
Loss = 1.1323e-03, PNorm = 172.9767, GNorm = 0.0539, lr_0 = 1.1304e-04
Loss = 1.4284e-03, PNorm = 172.9785, GNorm = 0.0748, lr_0 = 1.1297e-04
Loss = 8.8737e-04, PNorm = 172.9798, GNorm = 0.0543, lr_0 = 1.1289e-04
Loss = 2.7301e-03, PNorm = 172.9806, GNorm = 0.1793, lr_0 = 1.1281e-04
Loss = 2.5273e-03, PNorm = 172.9816, GNorm = 0.3256, lr_0 = 1.1273e-04
Loss = 2.7750e-03, PNorm = 172.9838, GNorm = 0.2343, lr_0 = 1.1266e-04
Loss = 9.7663e-04, PNorm = 172.9855, GNorm = 0.0888, lr_0 = 1.1258e-04
Loss = 8.5797e-04, PNorm = 172.9875, GNorm = 0.1351, lr_0 = 1.1250e-04
Loss = 3.6088e-03, PNorm = 172.9891, GNorm = 0.1201, lr_0 = 1.1243e-04
Loss = 1.2020e-03, PNorm = 172.9897, GNorm = 0.0811, lr_0 = 1.1235e-04
Loss = 1.4091e-03, PNorm = 172.9914, GNorm = 0.0806, lr_0 = 1.1227e-04
Loss = 1.5452e-03, PNorm = 172.9922, GNorm = 0.0980, lr_0 = 1.1219e-04
Loss = 2.0090e-03, PNorm = 172.9938, GNorm = 0.1363, lr_0 = 1.1212e-04
Loss = 1.8946e-03, PNorm = 172.9956, GNorm = 0.1437, lr_0 = 1.1204e-04
Loss = 1.5433e-03, PNorm = 172.9989, GNorm = 0.1490, lr_0 = 1.1196e-04
Loss = 9.7892e-04, PNorm = 173.0000, GNorm = 0.0732, lr_0 = 1.1189e-04
Loss = 1.2329e-03, PNorm = 173.0023, GNorm = 0.0865, lr_0 = 1.1181e-04
Loss = 2.2647e-03, PNorm = 173.0044, GNorm = 0.0854, lr_0 = 1.1173e-04
Loss = 2.6522e-03, PNorm = 173.0060, GNorm = 0.1157, lr_0 = 1.1166e-04
Loss = 2.6686e-03, PNorm = 173.0070, GNorm = 0.0436, lr_0 = 1.1158e-04
Loss = 8.8048e-04, PNorm = 173.0065, GNorm = 0.0556, lr_0 = 1.1150e-04
Loss = 7.8245e-04, PNorm = 173.0076, GNorm = 0.1011, lr_0 = 1.1143e-04
Loss = 2.3762e-03, PNorm = 173.0088, GNorm = 0.0752, lr_0 = 1.1135e-04
Loss = 7.3151e-04, PNorm = 173.0108, GNorm = 0.2005, lr_0 = 1.1128e-04
Loss = 8.0276e-04, PNorm = 173.0131, GNorm = 0.0602, lr_0 = 1.1120e-04
Loss = 8.7979e-04, PNorm = 173.0136, GNorm = 0.1439, lr_0 = 1.1112e-04
Loss = 1.2100e-03, PNorm = 173.0142, GNorm = 0.1409, lr_0 = 1.1105e-04
Loss = 3.7361e-03, PNorm = 173.0157, GNorm = 0.2928, lr_0 = 1.1097e-04
Loss = 5.3483e-04, PNorm = 173.0175, GNorm = 0.0679, lr_0 = 1.1089e-04
Loss = 5.4147e-04, PNorm = 173.0192, GNorm = 0.0879, lr_0 = 1.1082e-04
Loss = 5.9210e-04, PNorm = 173.0215, GNorm = 0.0889, lr_0 = 1.1074e-04
Loss = 8.4165e-04, PNorm = 173.0232, GNorm = 0.1651, lr_0 = 1.1067e-04
Loss = 1.2063e-03, PNorm = 173.0242, GNorm = 0.0358, lr_0 = 1.1059e-04
Loss = 2.2622e-03, PNorm = 173.0250, GNorm = 0.1185, lr_0 = 1.1052e-04
Loss = 1.0099e-03, PNorm = 173.0260, GNorm = 0.0309, lr_0 = 1.1044e-04
Loss = 5.1993e-04, PNorm = 173.0272, GNorm = 0.1040, lr_0 = 1.1036e-04
Loss = 1.2131e-03, PNorm = 173.0277, GNorm = 0.0509, lr_0 = 1.1029e-04
Loss = 5.8153e-04, PNorm = 173.0290, GNorm = 0.0698, lr_0 = 1.1021e-04
Loss = 1.9475e-03, PNorm = 173.0309, GNorm = 0.0394, lr_0 = 1.1014e-04
Loss = 7.8173e-04, PNorm = 173.0323, GNorm = 0.1377, lr_0 = 1.1006e-04
Loss = 2.9248e-03, PNorm = 173.0330, GNorm = 0.1182, lr_0 = 1.0999e-04
Loss = 8.4234e-04, PNorm = 173.0336, GNorm = 0.1233, lr_0 = 1.0991e-04
Loss = 5.4309e-04, PNorm = 173.0348, GNorm = 0.1238, lr_0 = 1.0984e-04
Loss = 1.7129e-03, PNorm = 173.0363, GNorm = 0.0621, lr_0 = 1.0976e-04
Loss = 8.7755e-04, PNorm = 173.0371, GNorm = 0.0657, lr_0 = 1.0969e-04
Loss = 2.3728e-03, PNorm = 173.0399, GNorm = 0.1210, lr_0 = 1.0961e-04
Loss = 9.2819e-04, PNorm = 173.0413, GNorm = 0.1675, lr_0 = 1.0954e-04
Loss = 5.4185e-04, PNorm = 173.0408, GNorm = 0.1340, lr_0 = 1.0946e-04
Loss = 1.1980e-03, PNorm = 173.0417, GNorm = 0.0896, lr_0 = 1.0939e-04
Loss = 9.8978e-04, PNorm = 173.0424, GNorm = 0.1550, lr_0 = 1.0931e-04
Loss = 9.1842e-04, PNorm = 173.0447, GNorm = 0.0371, lr_0 = 1.0924e-04
Loss = 1.7369e-03, PNorm = 173.0467, GNorm = 0.1979, lr_0 = 1.0916e-04
Loss = 5.5704e-04, PNorm = 173.0479, GNorm = 0.0359, lr_0 = 1.0909e-04
Loss = 1.1328e-03, PNorm = 173.0500, GNorm = 0.0690, lr_0 = 1.0901e-04
Loss = 6.9867e-04, PNorm = 173.0511, GNorm = 0.0549, lr_0 = 1.0894e-04
Loss = 2.3141e-03, PNorm = 173.0528, GNorm = 0.1363, lr_0 = 1.0886e-04
Loss = 1.2525e-03, PNorm = 173.0528, GNorm = 0.1566, lr_0 = 1.0879e-04
Loss = 7.6431e-04, PNorm = 173.0543, GNorm = 0.0964, lr_0 = 1.0871e-04
Loss = 1.1578e-03, PNorm = 173.0547, GNorm = 0.0698, lr_0 = 1.0864e-04
Loss = 9.6089e-04, PNorm = 173.0568, GNorm = 0.0983, lr_0 = 1.0856e-04
Validation mae = 0.277199
Epoch 29
Loss = 6.4729e-04, PNorm = 173.0579, GNorm = 0.1021, lr_0 = 1.0849e-04
Loss = 6.5536e-04, PNorm = 173.0605, GNorm = 0.1122, lr_0 = 1.0841e-04
Loss = 1.1287e-03, PNorm = 173.0611, GNorm = 0.0658, lr_0 = 1.0834e-04
Loss = 6.4763e-04, PNorm = 173.0624, GNorm = 0.0673, lr_0 = 1.0827e-04
Loss = 1.6379e-03, PNorm = 173.0630, GNorm = 0.1263, lr_0 = 1.0819e-04
Loss = 2.1449e-03, PNorm = 173.0643, GNorm = 0.1742, lr_0 = 1.0812e-04
Loss = 5.8595e-04, PNorm = 173.0648, GNorm = 0.0202, lr_0 = 1.0804e-04
Loss = 7.0337e-04, PNorm = 173.0651, GNorm = 0.0478, lr_0 = 1.0797e-04
Loss = 5.3981e-04, PNorm = 173.0658, GNorm = 0.0636, lr_0 = 1.0790e-04
Loss = 3.0367e-03, PNorm = 173.0666, GNorm = 0.1278, lr_0 = 1.0782e-04
Loss = 6.8916e-04, PNorm = 173.0693, GNorm = 0.0806, lr_0 = 1.0775e-04
Loss = 1.1476e-03, PNorm = 173.0719, GNorm = 0.0702, lr_0 = 1.0767e-04
Loss = 7.8675e-04, PNorm = 173.0723, GNorm = 0.1272, lr_0 = 1.0760e-04
Loss = 1.2888e-03, PNorm = 173.0740, GNorm = 0.0514, lr_0 = 1.0753e-04
Loss = 4.7162e-04, PNorm = 173.0749, GNorm = 0.0704, lr_0 = 1.0745e-04
Loss = 1.6429e-03, PNorm = 173.0776, GNorm = 0.0770, lr_0 = 1.0738e-04
Loss = 6.9620e-04, PNorm = 173.0781, GNorm = 0.0739, lr_0 = 1.0731e-04
Loss = 1.5977e-03, PNorm = 173.0785, GNorm = 0.1638, lr_0 = 1.0723e-04
Loss = 1.9071e-03, PNorm = 173.0788, GNorm = 0.0416, lr_0 = 1.0716e-04
Loss = 2.1496e-03, PNorm = 173.0791, GNorm = 0.1229, lr_0 = 1.0709e-04
Loss = 9.0694e-04, PNorm = 173.0801, GNorm = 0.0488, lr_0 = 1.0701e-04
Loss = 3.3178e-03, PNorm = 173.0806, GNorm = 0.0389, lr_0 = 1.0694e-04
Loss = 6.9016e-04, PNorm = 173.0822, GNorm = 0.1230, lr_0 = 1.0687e-04
Loss = 1.0098e-03, PNorm = 173.0842, GNorm = 0.0384, lr_0 = 1.0679e-04
Loss = 1.0600e-03, PNorm = 173.0852, GNorm = 0.0847, lr_0 = 1.0672e-04
Loss = 5.5562e-04, PNorm = 173.0868, GNorm = 0.0663, lr_0 = 1.0665e-04
Loss = 1.4310e-03, PNorm = 173.0879, GNorm = 0.0765, lr_0 = 1.0657e-04
Loss = 7.1975e-04, PNorm = 173.0887, GNorm = 0.1134, lr_0 = 1.0650e-04
Loss = 1.7700e-03, PNorm = 173.0901, GNorm = 0.0946, lr_0 = 1.0643e-04
Loss = 6.8664e-04, PNorm = 173.0905, GNorm = 0.0687, lr_0 = 1.0635e-04
Loss = 8.9017e-04, PNorm = 173.0925, GNorm = 0.0555, lr_0 = 1.0628e-04
Loss = 9.0293e-04, PNorm = 173.0938, GNorm = 0.0682, lr_0 = 1.0621e-04
Loss = 5.8885e-04, PNorm = 173.0948, GNorm = 0.0754, lr_0 = 1.0614e-04
Loss = 2.2655e-03, PNorm = 173.0958, GNorm = 0.0437, lr_0 = 1.0606e-04
Loss = 4.6948e-04, PNorm = 173.0960, GNorm = 0.0527, lr_0 = 1.0599e-04
Loss = 2.1116e-03, PNorm = 173.0965, GNorm = 0.0402, lr_0 = 1.0592e-04
Loss = 1.7249e-03, PNorm = 173.0966, GNorm = 0.2839, lr_0 = 1.0585e-04
Loss = 6.8207e-04, PNorm = 173.0975, GNorm = 0.0296, lr_0 = 1.0577e-04
Loss = 7.1172e-04, PNorm = 173.0985, GNorm = 0.0517, lr_0 = 1.0570e-04
Loss = 8.9763e-04, PNorm = 173.0993, GNorm = 0.1958, lr_0 = 1.0563e-04
Loss = 5.7796e-04, PNorm = 173.1004, GNorm = 0.0597, lr_0 = 1.0556e-04
Loss = 5.6760e-04, PNorm = 173.0998, GNorm = 0.0637, lr_0 = 1.0548e-04
Loss = 4.4554e-04, PNorm = 173.1019, GNorm = 0.0559, lr_0 = 1.0541e-04
Loss = 1.8412e-03, PNorm = 173.1026, GNorm = 0.1768, lr_0 = 1.0534e-04
Loss = 5.1094e-04, PNorm = 173.1028, GNorm = 0.1986, lr_0 = 1.0527e-04
Loss = 7.6092e-04, PNorm = 173.1036, GNorm = 0.1432, lr_0 = 1.0519e-04
Loss = 8.6396e-04, PNorm = 173.1049, GNorm = 0.0805, lr_0 = 1.0512e-04
Loss = 2.8698e-03, PNorm = 173.1078, GNorm = 0.0680, lr_0 = 1.0505e-04
Loss = 1.1544e-03, PNorm = 173.1094, GNorm = 0.0649, lr_0 = 1.0498e-04
Loss = 1.0629e-03, PNorm = 173.1096, GNorm = 0.2504, lr_0 = 1.0491e-04
Loss = 2.2329e-03, PNorm = 173.1098, GNorm = 0.1233, lr_0 = 1.0483e-04
Loss = 1.0537e-03, PNorm = 173.1095, GNorm = 0.1167, lr_0 = 1.0476e-04
Loss = 8.2715e-04, PNorm = 173.1104, GNorm = 0.0572, lr_0 = 1.0469e-04
Loss = 7.9376e-04, PNorm = 173.1113, GNorm = 0.1056, lr_0 = 1.0462e-04
Loss = 5.7642e-04, PNorm = 173.1118, GNorm = 0.0696, lr_0 = 1.0455e-04
Loss = 1.3703e-03, PNorm = 173.1126, GNorm = 0.1141, lr_0 = 1.0448e-04
Loss = 7.6258e-04, PNorm = 173.1135, GNorm = 0.0445, lr_0 = 1.0440e-04
Loss = 9.6487e-04, PNorm = 173.1133, GNorm = 0.0448, lr_0 = 1.0433e-04
Loss = 8.3646e-04, PNorm = 173.1143, GNorm = 0.1867, lr_0 = 1.0426e-04
Loss = 5.0701e-04, PNorm = 173.1146, GNorm = 0.0247, lr_0 = 1.0419e-04
Loss = 9.7186e-04, PNorm = 173.1147, GNorm = 0.1004, lr_0 = 1.0412e-04
Loss = 7.7281e-04, PNorm = 173.1158, GNorm = 0.0532, lr_0 = 1.0405e-04
Loss = 8.5538e-04, PNorm = 173.1166, GNorm = 0.0669, lr_0 = 1.0398e-04
Loss = 1.1759e-03, PNorm = 173.1181, GNorm = 0.0768, lr_0 = 1.0391e-04
Loss = 1.8941e-03, PNorm = 173.1176, GNorm = 0.2711, lr_0 = 1.0383e-04
Loss = 1.2932e-03, PNorm = 173.1191, GNorm = 0.1543, lr_0 = 1.0376e-04
Loss = 6.5190e-04, PNorm = 173.1203, GNorm = 0.0720, lr_0 = 1.0369e-04
Loss = 6.7803e-04, PNorm = 173.1220, GNorm = 0.0377, lr_0 = 1.0362e-04
Loss = 4.9265e-04, PNorm = 173.1238, GNorm = 0.0272, lr_0 = 1.0355e-04
Loss = 4.2684e-04, PNorm = 173.1251, GNorm = 0.0384, lr_0 = 1.0348e-04
Loss = 3.2880e-03, PNorm = 173.1264, GNorm = 0.0649, lr_0 = 1.0341e-04
Loss = 6.1358e-04, PNorm = 173.1269, GNorm = 0.0445, lr_0 = 1.0334e-04
Loss = 8.1854e-04, PNorm = 173.1277, GNorm = 0.0737, lr_0 = 1.0327e-04
Loss = 6.4287e-04, PNorm = 173.1288, GNorm = 0.0437, lr_0 = 1.0320e-04
Loss = 1.6769e-03, PNorm = 173.1303, GNorm = 0.1068, lr_0 = 1.0312e-04
Loss = 1.1600e-03, PNorm = 173.1315, GNorm = 0.1159, lr_0 = 1.0305e-04
Loss = 8.3071e-04, PNorm = 173.1320, GNorm = 0.0491, lr_0 = 1.0298e-04
Loss = 8.1310e-04, PNorm = 173.1337, GNorm = 0.0945, lr_0 = 1.0291e-04
Loss = 2.5111e-03, PNorm = 173.1359, GNorm = 0.1937, lr_0 = 1.0284e-04
Loss = 1.6256e-03, PNorm = 173.1365, GNorm = 0.0823, lr_0 = 1.0277e-04
Loss = 1.6430e-03, PNorm = 173.1376, GNorm = 0.0916, lr_0 = 1.0270e-04
Loss = 1.8835e-03, PNorm = 173.1387, GNorm = 0.2151, lr_0 = 1.0263e-04
Loss = 9.8899e-04, PNorm = 173.1409, GNorm = 0.0590, lr_0 = 1.0256e-04
Loss = 1.2368e-03, PNorm = 173.1436, GNorm = 0.0620, lr_0 = 1.0249e-04
Loss = 6.4347e-04, PNorm = 173.1453, GNorm = 0.0302, lr_0 = 1.0242e-04
Loss = 9.1676e-04, PNorm = 173.1470, GNorm = 0.1102, lr_0 = 1.0235e-04
Loss = 7.4237e-04, PNorm = 173.1467, GNorm = 0.1348, lr_0 = 1.0228e-04
Loss = 1.0610e-03, PNorm = 173.1475, GNorm = 0.1220, lr_0 = 1.0221e-04
Loss = 1.8921e-03, PNorm = 173.1481, GNorm = 0.0935, lr_0 = 1.0214e-04
Loss = 1.3324e-03, PNorm = 173.1495, GNorm = 0.0511, lr_0 = 1.0207e-04
Loss = 5.5028e-04, PNorm = 173.1506, GNorm = 0.0935, lr_0 = 1.0200e-04
Loss = 4.8982e-04, PNorm = 173.1522, GNorm = 0.0654, lr_0 = 1.0193e-04
Loss = 1.3925e-03, PNorm = 173.1531, GNorm = 0.1040, lr_0 = 1.0186e-04
Loss = 1.0288e-03, PNorm = 173.1545, GNorm = 0.0833, lr_0 = 1.0179e-04
Loss = 1.2623e-03, PNorm = 173.1547, GNorm = 0.0716, lr_0 = 1.0172e-04
Loss = 5.6011e-04, PNorm = 173.1558, GNorm = 0.0485, lr_0 = 1.0165e-04
Loss = 6.3745e-04, PNorm = 173.1570, GNorm = 0.1130, lr_0 = 1.0158e-04
Loss = 2.2094e-03, PNorm = 173.1569, GNorm = 0.1694, lr_0 = 1.0151e-04
Loss = 1.1366e-03, PNorm = 173.1575, GNorm = 0.0876, lr_0 = 1.0144e-04
Loss = 5.5024e-04, PNorm = 173.1592, GNorm = 0.1248, lr_0 = 1.0137e-04
Loss = 4.5044e-04, PNorm = 173.1603, GNorm = 0.0916, lr_0 = 1.0130e-04
Loss = 1.7680e-03, PNorm = 173.1613, GNorm = 0.0356, lr_0 = 1.0123e-04
Loss = 1.4113e-03, PNorm = 173.1621, GNorm = 0.3934, lr_0 = 1.0116e-04
Loss = 7.2084e-04, PNorm = 173.1634, GNorm = 0.0917, lr_0 = 1.0110e-04
Loss = 1.9928e-03, PNorm = 173.1648, GNorm = 0.0221, lr_0 = 1.0103e-04
Loss = 2.0216e-03, PNorm = 173.1666, GNorm = 0.1075, lr_0 = 1.0096e-04
Loss = 1.0516e-03, PNorm = 173.1678, GNorm = 0.0390, lr_0 = 1.0089e-04
Loss = 7.4143e-04, PNorm = 173.1690, GNorm = 0.0952, lr_0 = 1.0082e-04
Loss = 1.1427e-03, PNorm = 173.1699, GNorm = 0.0409, lr_0 = 1.0075e-04
Loss = 1.3827e-03, PNorm = 173.1711, GNorm = 0.1461, lr_0 = 1.0068e-04
Loss = 2.5838e-03, PNorm = 173.1712, GNorm = 0.1309, lr_0 = 1.0061e-04
Loss = 6.9454e-04, PNorm = 173.1717, GNorm = 0.0991, lr_0 = 1.0054e-04
Loss = 2.1379e-03, PNorm = 173.1720, GNorm = 0.1743, lr_0 = 1.0047e-04
Loss = 2.4661e-03, PNorm = 173.1737, GNorm = 0.1163, lr_0 = 1.0041e-04
Loss = 1.6045e-03, PNorm = 173.1752, GNorm = 0.0392, lr_0 = 1.0034e-04
Loss = 2.2205e-03, PNorm = 173.1769, GNorm = 0.0460, lr_0 = 1.0027e-04
Loss = 7.5558e-04, PNorm = 173.1781, GNorm = 0.0938, lr_0 = 1.0020e-04
Loss = 2.0231e-03, PNorm = 173.1804, GNorm = 0.1516, lr_0 = 1.0013e-04
Loss = 3.1913e-03, PNorm = 173.1807, GNorm = 0.3017, lr_0 = 1.0006e-04
Loss = 5.0573e-04, PNorm = 173.1832, GNorm = 0.1200, lr_0 = 1.0000e-04
Validation mae = 0.277453
Model 0 best validation mae = 0.277199 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.274277
Ensemble test mae = 0.274277
Fold 5
Splitting data with seed 5
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 8.4084e-01, PNorm = 65.7665, GNorm = 1.7177, lr_0 = 1.0413e-04
Loss = 7.7313e-01, PNorm = 65.7804, GNorm = 1.8427, lr_0 = 1.0788e-04
Loss = 5.4319e-01, PNorm = 65.7948, GNorm = 2.5376, lr_0 = 1.1163e-04
Loss = 5.0333e-01, PNorm = 65.8072, GNorm = 2.3721, lr_0 = 1.1537e-04
Loss = 4.6995e-01, PNorm = 65.8169, GNorm = 1.9534, lr_0 = 1.1913e-04
Loss = 4.5095e-01, PNorm = 65.8244, GNorm = 3.2348, lr_0 = 1.2287e-04
Loss = 4.7227e-01, PNorm = 65.8340, GNorm = 4.0001, lr_0 = 1.2663e-04
Loss = 4.0019e-01, PNorm = 65.8423, GNorm = 2.2763, lr_0 = 1.3038e-04
Loss = 4.0586e-01, PNorm = 65.8528, GNorm = 1.7295, lr_0 = 1.3413e-04
Loss = 4.0205e-01, PNorm = 65.8620, GNorm = 3.6145, lr_0 = 1.3788e-04
Loss = 3.8387e-01, PNorm = 65.8722, GNorm = 2.3965, lr_0 = 1.4163e-04
Loss = 3.9991e-01, PNorm = 65.8832, GNorm = 2.5596, lr_0 = 1.4537e-04
Loss = 4.0365e-01, PNorm = 65.8917, GNorm = 1.9703, lr_0 = 1.4913e-04
Loss = 4.5380e-01, PNorm = 65.9022, GNorm = 1.8898, lr_0 = 1.5288e-04
Loss = 3.5174e-01, PNorm = 65.9155, GNorm = 2.5419, lr_0 = 1.5662e-04
Loss = 3.3238e-01, PNorm = 65.9261, GNorm = 2.6850, lr_0 = 1.6038e-04
Loss = 3.2866e-01, PNorm = 65.9365, GNorm = 1.6979, lr_0 = 1.6412e-04
Loss = 3.6793e-01, PNorm = 65.9496, GNorm = 1.9375, lr_0 = 1.6788e-04
Loss = 3.0236e-01, PNorm = 65.9619, GNorm = 2.3616, lr_0 = 1.7163e-04
Loss = 3.1868e-01, PNorm = 65.9728, GNorm = 2.3283, lr_0 = 1.7538e-04
Loss = 3.0553e-01, PNorm = 65.9854, GNorm = 1.7448, lr_0 = 1.7913e-04
Loss = 3.5319e-01, PNorm = 65.9965, GNorm = 1.4343, lr_0 = 1.8288e-04
Loss = 3.0771e-01, PNorm = 66.0109, GNorm = 1.5547, lr_0 = 1.8662e-04
Loss = 3.4202e-01, PNorm = 66.0236, GNorm = 2.3442, lr_0 = 1.9038e-04
Loss = 3.7780e-01, PNorm = 66.0360, GNorm = 2.1031, lr_0 = 1.9413e-04
Loss = 3.4666e-01, PNorm = 66.0509, GNorm = 2.1611, lr_0 = 1.9788e-04
Loss = 3.5772e-01, PNorm = 66.0662, GNorm = 2.2644, lr_0 = 2.0163e-04
Loss = 3.3526e-01, PNorm = 66.0851, GNorm = 2.0609, lr_0 = 2.0537e-04
Loss = 3.1626e-01, PNorm = 66.1046, GNorm = 2.1368, lr_0 = 2.0913e-04
Loss = 3.0965e-01, PNorm = 66.1184, GNorm = 2.3767, lr_0 = 2.1288e-04
Loss = 2.7846e-01, PNorm = 66.1346, GNorm = 2.0481, lr_0 = 2.1663e-04
Loss = 3.0074e-01, PNorm = 66.1521, GNorm = 1.6037, lr_0 = 2.2038e-04
Loss = 2.8279e-01, PNorm = 66.1682, GNorm = 1.7001, lr_0 = 2.2412e-04
Loss = 2.9890e-01, PNorm = 66.1855, GNorm = 2.9432, lr_0 = 2.2787e-04
Loss = 3.0954e-01, PNorm = 66.2023, GNorm = 1.7725, lr_0 = 2.3163e-04
Loss = 3.1769e-01, PNorm = 66.2221, GNorm = 1.8972, lr_0 = 2.3538e-04
Loss = 3.0723e-01, PNorm = 66.2431, GNorm = 1.4569, lr_0 = 2.3913e-04
Loss = 3.0523e-01, PNorm = 66.2625, GNorm = 1.1124, lr_0 = 2.4288e-04
Loss = 2.8952e-01, PNorm = 66.2855, GNorm = 1.4374, lr_0 = 2.4662e-04
Loss = 2.9898e-01, PNorm = 66.3046, GNorm = 1.4521, lr_0 = 2.5038e-04
Loss = 2.9304e-01, PNorm = 66.3256, GNorm = 1.4447, lr_0 = 2.5413e-04
Loss = 3.1075e-01, PNorm = 66.3502, GNorm = 1.3749, lr_0 = 2.5788e-04
Loss = 3.5062e-01, PNorm = 66.3710, GNorm = 2.5199, lr_0 = 2.6163e-04
Loss = 3.2394e-01, PNorm = 66.3956, GNorm = 1.4518, lr_0 = 2.6537e-04
Loss = 3.3760e-01, PNorm = 66.4248, GNorm = 1.6789, lr_0 = 2.6912e-04
Loss = 3.2515e-01, PNorm = 66.4525, GNorm = 1.4861, lr_0 = 2.7288e-04
Loss = 3.1283e-01, PNorm = 66.4802, GNorm = 1.3886, lr_0 = 2.7663e-04
Loss = 3.2062e-01, PNorm = 66.5044, GNorm = 1.6365, lr_0 = 2.8038e-04
Loss = 3.1262e-01, PNorm = 66.5281, GNorm = 1.2796, lr_0 = 2.8413e-04
Loss = 3.1859e-01, PNorm = 66.5562, GNorm = 1.5894, lr_0 = 2.8787e-04
Loss = 2.5930e-01, PNorm = 66.5774, GNorm = 1.2483, lr_0 = 2.9163e-04
Loss = 2.5389e-01, PNorm = 66.6001, GNorm = 1.0350, lr_0 = 2.9538e-04
Loss = 2.9705e-01, PNorm = 66.6225, GNorm = 0.9747, lr_0 = 2.9913e-04
Loss = 2.9524e-01, PNorm = 66.6477, GNorm = 1.5587, lr_0 = 3.0288e-04
Loss = 2.8940e-01, PNorm = 66.6744, GNorm = 1.0398, lr_0 = 3.0662e-04
Loss = 3.0688e-01, PNorm = 66.7006, GNorm = 2.4551, lr_0 = 3.1037e-04
Loss = 3.1317e-01, PNorm = 66.7244, GNorm = 0.9001, lr_0 = 3.1413e-04
Loss = 3.5701e-01, PNorm = 66.7573, GNorm = 1.7604, lr_0 = 3.1788e-04
Loss = 2.4856e-01, PNorm = 66.7858, GNorm = 1.1198, lr_0 = 3.2163e-04
Loss = 2.8325e-01, PNorm = 66.8098, GNorm = 1.1548, lr_0 = 3.2538e-04
Loss = 2.3257e-01, PNorm = 66.8412, GNorm = 1.3066, lr_0 = 3.2912e-04
Loss = 2.4985e-01, PNorm = 66.8625, GNorm = 1.0622, lr_0 = 3.3288e-04
Loss = 2.9559e-01, PNorm = 66.8881, GNorm = 1.5625, lr_0 = 3.3663e-04
Loss = 2.7190e-01, PNorm = 66.9178, GNorm = 1.3157, lr_0 = 3.4038e-04
Loss = 2.8522e-01, PNorm = 66.9485, GNorm = 1.2123, lr_0 = 3.4413e-04
Loss = 2.6181e-01, PNorm = 66.9814, GNorm = 1.2954, lr_0 = 3.4787e-04
Loss = 2.9575e-01, PNorm = 67.0136, GNorm = 1.3023, lr_0 = 3.5162e-04
Loss = 2.8339e-01, PNorm = 67.0474, GNorm = 1.1039, lr_0 = 3.5538e-04
Loss = 2.6150e-01, PNorm = 67.0761, GNorm = 0.9501, lr_0 = 3.5913e-04
Loss = 2.8982e-01, PNorm = 67.1106, GNorm = 1.2735, lr_0 = 3.6288e-04
Loss = 2.8491e-01, PNorm = 67.1453, GNorm = 1.0280, lr_0 = 3.6662e-04
Loss = 2.9059e-01, PNorm = 67.1818, GNorm = 1.1569, lr_0 = 3.7037e-04
Loss = 2.7357e-01, PNorm = 67.2213, GNorm = 1.1234, lr_0 = 3.7413e-04
Loss = 2.6383e-01, PNorm = 67.2576, GNorm = 1.1775, lr_0 = 3.7788e-04
Loss = 2.4259e-01, PNorm = 67.2962, GNorm = 1.3671, lr_0 = 3.8163e-04
Loss = 2.6102e-01, PNorm = 67.3276, GNorm = 1.0699, lr_0 = 3.8537e-04
Loss = 2.5902e-01, PNorm = 67.3710, GNorm = 1.0425, lr_0 = 3.8912e-04
Loss = 2.2976e-01, PNorm = 67.4058, GNorm = 0.9097, lr_0 = 3.9287e-04
Loss = 2.8743e-01, PNorm = 67.4427, GNorm = 1.1309, lr_0 = 3.9663e-04
Loss = 2.8424e-01, PNorm = 67.4853, GNorm = 1.4313, lr_0 = 4.0038e-04
Loss = 3.0465e-01, PNorm = 67.5304, GNorm = 1.4289, lr_0 = 4.0413e-04
Loss = 2.8668e-01, PNorm = 67.5797, GNorm = 1.1915, lr_0 = 4.0787e-04
Loss = 2.7179e-01, PNorm = 67.6178, GNorm = 1.2474, lr_0 = 4.1162e-04
Loss = 2.7688e-01, PNorm = 67.6635, GNorm = 1.5863, lr_0 = 4.1537e-04
Loss = 2.8684e-01, PNorm = 67.7072, GNorm = 1.2377, lr_0 = 4.1913e-04
Loss = 2.7460e-01, PNorm = 67.7538, GNorm = 0.9214, lr_0 = 4.2288e-04
Loss = 2.9345e-01, PNorm = 67.7975, GNorm = 1.2248, lr_0 = 4.2662e-04
Loss = 2.6605e-01, PNorm = 67.8408, GNorm = 1.1326, lr_0 = 4.3037e-04
Loss = 2.5513e-01, PNorm = 67.8887, GNorm = 1.1063, lr_0 = 4.3412e-04
Loss = 3.0244e-01, PNorm = 67.9315, GNorm = 1.1658, lr_0 = 4.3788e-04
Loss = 2.6460e-01, PNorm = 67.9828, GNorm = 1.2117, lr_0 = 4.4163e-04
Loss = 2.8111e-01, PNorm = 68.0306, GNorm = 1.7488, lr_0 = 4.4538e-04
Loss = 2.5483e-01, PNorm = 68.0749, GNorm = 0.9324, lr_0 = 4.4912e-04
Loss = 3.1118e-01, PNorm = 68.1243, GNorm = 1.1085, lr_0 = 4.5287e-04
Loss = 2.5878e-01, PNorm = 68.1683, GNorm = 0.8730, lr_0 = 4.5662e-04
Loss = 2.5410e-01, PNorm = 68.2158, GNorm = 1.1771, lr_0 = 4.6038e-04
Loss = 2.5074e-01, PNorm = 68.2623, GNorm = 0.9385, lr_0 = 4.6413e-04
Loss = 2.7890e-01, PNorm = 68.3103, GNorm = 0.8997, lr_0 = 4.6787e-04
Loss = 2.4673e-01, PNorm = 68.3602, GNorm = 0.8718, lr_0 = 4.7162e-04
Loss = 2.9216e-01, PNorm = 68.4067, GNorm = 1.0925, lr_0 = 4.7537e-04
Loss = 2.3474e-01, PNorm = 68.4582, GNorm = 0.7962, lr_0 = 4.7913e-04
Loss = 2.7990e-01, PNorm = 68.5073, GNorm = 1.4727, lr_0 = 4.8288e-04
Loss = 2.5449e-01, PNorm = 68.5596, GNorm = 1.0807, lr_0 = 4.8663e-04
Loss = 2.5519e-01, PNorm = 68.6190, GNorm = 1.4428, lr_0 = 4.9038e-04
Loss = 2.1688e-01, PNorm = 68.6709, GNorm = 0.8846, lr_0 = 4.9412e-04
Loss = 2.3297e-01, PNorm = 68.7281, GNorm = 1.6415, lr_0 = 4.9788e-04
Loss = 2.8567e-01, PNorm = 68.7736, GNorm = 1.0552, lr_0 = 5.0163e-04
Loss = 2.5409e-01, PNorm = 68.8367, GNorm = 1.4308, lr_0 = 5.0538e-04
Loss = 2.3985e-01, PNorm = 68.8879, GNorm = 1.4478, lr_0 = 5.0913e-04
Loss = 2.4340e-01, PNorm = 68.9393, GNorm = 1.2096, lr_0 = 5.1287e-04
Loss = 2.6610e-01, PNorm = 68.9930, GNorm = 1.0583, lr_0 = 5.1663e-04
Loss = 2.5984e-01, PNorm = 69.0412, GNorm = 0.9806, lr_0 = 5.2038e-04
Loss = 2.4713e-01, PNorm = 69.0956, GNorm = 1.1209, lr_0 = 5.2413e-04
Loss = 2.6536e-01, PNorm = 69.1448, GNorm = 0.9693, lr_0 = 5.2788e-04
Loss = 2.6114e-01, PNorm = 69.2044, GNorm = 1.0527, lr_0 = 5.3162e-04
Loss = 2.8652e-01, PNorm = 69.2606, GNorm = 0.8738, lr_0 = 5.3538e-04
Loss = 2.7354e-01, PNorm = 69.3234, GNorm = 1.1150, lr_0 = 5.3912e-04
Loss = 2.1753e-01, PNorm = 69.3841, GNorm = 0.8943, lr_0 = 5.4288e-04
Loss = 2.3438e-01, PNorm = 69.4483, GNorm = 0.9774, lr_0 = 5.4663e-04
Loss = 2.5754e-01, PNorm = 69.5024, GNorm = 0.7733, lr_0 = 5.5038e-04
Validation mae = 0.317295
Epoch 1
Loss = 1.5492e-01, PNorm = 69.5628, GNorm = 0.6793, lr_0 = 5.5413e-04
Loss = 1.8211e-01, PNorm = 69.6195, GNorm = 0.8035, lr_0 = 5.5787e-04
Loss = 1.8206e-01, PNorm = 69.6699, GNorm = 0.9775, lr_0 = 5.6163e-04
Loss = 1.7648e-01, PNorm = 69.7159, GNorm = 1.1215, lr_0 = 5.6538e-04
Loss = 1.8586e-01, PNorm = 69.7704, GNorm = 0.8326, lr_0 = 5.6913e-04
Loss = 1.6898e-01, PNorm = 69.8287, GNorm = 0.7779, lr_0 = 5.7288e-04
Loss = 1.7960e-01, PNorm = 69.8939, GNorm = 0.7704, lr_0 = 5.7662e-04
Loss = 1.8600e-01, PNorm = 69.9507, GNorm = 0.8096, lr_0 = 5.8038e-04
Loss = 1.7164e-01, PNorm = 70.0179, GNorm = 1.0816, lr_0 = 5.8413e-04
Loss = 1.7808e-01, PNorm = 70.0798, GNorm = 1.0065, lr_0 = 5.8788e-04
Loss = 1.6262e-01, PNorm = 70.1510, GNorm = 0.7615, lr_0 = 5.9163e-04
Loss = 2.0224e-01, PNorm = 70.2124, GNorm = 0.8167, lr_0 = 5.9538e-04
Loss = 1.6881e-01, PNorm = 70.2870, GNorm = 0.9122, lr_0 = 5.9913e-04
Loss = 1.8000e-01, PNorm = 70.3588, GNorm = 0.7143, lr_0 = 6.0288e-04
Loss = 1.7223e-01, PNorm = 70.4281, GNorm = 0.8553, lr_0 = 6.0663e-04
Loss = 1.6964e-01, PNorm = 70.4991, GNorm = 0.6999, lr_0 = 6.1038e-04
Loss = 1.8762e-01, PNorm = 70.5781, GNorm = 1.2004, lr_0 = 6.1413e-04
Loss = 1.7496e-01, PNorm = 70.6423, GNorm = 0.9152, lr_0 = 6.1788e-04
Loss = 1.6784e-01, PNorm = 70.7205, GNorm = 0.8631, lr_0 = 6.2163e-04
Loss = 1.8576e-01, PNorm = 70.7939, GNorm = 0.9031, lr_0 = 6.2538e-04
Loss = 1.7966e-01, PNorm = 70.8721, GNorm = 1.0138, lr_0 = 6.2913e-04
Loss = 1.7899e-01, PNorm = 70.9576, GNorm = 0.7705, lr_0 = 6.3288e-04
Loss = 1.8337e-01, PNorm = 71.0347, GNorm = 0.8799, lr_0 = 6.3663e-04
Loss = 1.7687e-01, PNorm = 71.1174, GNorm = 0.7353, lr_0 = 6.4038e-04
Loss = 1.8510e-01, PNorm = 71.2051, GNorm = 0.8162, lr_0 = 6.4413e-04
Loss = 1.8106e-01, PNorm = 71.2964, GNorm = 1.2879, lr_0 = 6.4788e-04
Loss = 1.7874e-01, PNorm = 71.3872, GNorm = 0.7762, lr_0 = 6.5163e-04
Loss = 1.6801e-01, PNorm = 71.4856, GNorm = 0.8769, lr_0 = 6.5538e-04
Loss = 1.7261e-01, PNorm = 71.5587, GNorm = 0.9446, lr_0 = 6.5913e-04
Loss = 1.8858e-01, PNorm = 71.6482, GNorm = 0.8745, lr_0 = 6.6288e-04
Loss = 2.1397e-01, PNorm = 71.7358, GNorm = 1.1521, lr_0 = 6.6663e-04
Loss = 1.7729e-01, PNorm = 71.8374, GNorm = 0.6847, lr_0 = 6.7038e-04
Loss = 1.7901e-01, PNorm = 71.9365, GNorm = 0.8145, lr_0 = 6.7413e-04
Loss = 1.9515e-01, PNorm = 72.0382, GNorm = 0.8891, lr_0 = 6.7788e-04
Loss = 1.8203e-01, PNorm = 72.1444, GNorm = 1.1045, lr_0 = 6.8163e-04
Loss = 1.7615e-01, PNorm = 72.2426, GNorm = 0.6960, lr_0 = 6.8538e-04
Loss = 1.8058e-01, PNorm = 72.3364, GNorm = 0.8620, lr_0 = 6.8913e-04
Loss = 1.9687e-01, PNorm = 72.4332, GNorm = 1.1909, lr_0 = 6.9288e-04
Loss = 2.4510e-01, PNorm = 72.5345, GNorm = 1.2492, lr_0 = 6.9663e-04
Loss = 2.0710e-01, PNorm = 72.6580, GNorm = 1.1978, lr_0 = 7.0038e-04
Loss = 1.8069e-01, PNorm = 72.7733, GNorm = 0.8799, lr_0 = 7.0413e-04
Loss = 1.7822e-01, PNorm = 72.8849, GNorm = 0.8379, lr_0 = 7.0788e-04
Loss = 2.2210e-01, PNorm = 72.9818, GNorm = 1.0231, lr_0 = 7.1163e-04
Loss = 1.9646e-01, PNorm = 73.0959, GNorm = 0.7190, lr_0 = 7.1538e-04
Loss = 1.9140e-01, PNorm = 73.1973, GNorm = 0.8252, lr_0 = 7.1913e-04
Loss = 2.0402e-01, PNorm = 73.3108, GNorm = 0.8478, lr_0 = 7.2288e-04
Loss = 1.9120e-01, PNorm = 73.4112, GNorm = 0.8409, lr_0 = 7.2663e-04
Loss = 2.0164e-01, PNorm = 73.5283, GNorm = 0.7295, lr_0 = 7.3038e-04
Loss = 1.8382e-01, PNorm = 73.6381, GNorm = 0.8092, lr_0 = 7.3413e-04
Loss = 1.9678e-01, PNorm = 73.7472, GNorm = 0.6306, lr_0 = 7.3788e-04
Loss = 1.9768e-01, PNorm = 73.8518, GNorm = 0.7071, lr_0 = 7.4163e-04
Loss = 2.0538e-01, PNorm = 73.9776, GNorm = 0.7828, lr_0 = 7.4538e-04
Loss = 2.2692e-01, PNorm = 74.0874, GNorm = 0.7192, lr_0 = 7.4913e-04
Loss = 1.9002e-01, PNorm = 74.2147, GNorm = 0.5845, lr_0 = 7.5288e-04
Loss = 2.1003e-01, PNorm = 74.3279, GNorm = 0.7157, lr_0 = 7.5663e-04
Loss = 2.1633e-01, PNorm = 74.4443, GNorm = 0.7494, lr_0 = 7.6038e-04
Loss = 1.9888e-01, PNorm = 74.5564, GNorm = 0.7116, lr_0 = 7.6413e-04
Loss = 2.2914e-01, PNorm = 74.6845, GNorm = 1.1637, lr_0 = 7.6788e-04
Loss = 1.9433e-01, PNorm = 74.8157, GNorm = 0.9435, lr_0 = 7.7163e-04
Loss = 1.9407e-01, PNorm = 74.9344, GNorm = 0.8249, lr_0 = 7.7538e-04
Loss = 2.1576e-01, PNorm = 75.0509, GNorm = 0.7633, lr_0 = 7.7913e-04
Loss = 2.1254e-01, PNorm = 75.1651, GNorm = 1.0202, lr_0 = 7.8288e-04
Loss = 2.3037e-01, PNorm = 75.2995, GNorm = 1.5047, lr_0 = 7.8663e-04
Loss = 1.8778e-01, PNorm = 75.4217, GNorm = 1.1134, lr_0 = 7.9038e-04
Loss = 1.9199e-01, PNorm = 75.5344, GNorm = 0.9234, lr_0 = 7.9413e-04
Loss = 1.8329e-01, PNorm = 75.6378, GNorm = 0.8296, lr_0 = 7.9788e-04
Loss = 2.0798e-01, PNorm = 75.7509, GNorm = 1.1781, lr_0 = 8.0163e-04
Loss = 2.1017e-01, PNorm = 75.8603, GNorm = 0.6523, lr_0 = 8.0538e-04
Loss = 2.0689e-01, PNorm = 75.9781, GNorm = 1.1439, lr_0 = 8.0913e-04
Loss = 1.9433e-01, PNorm = 76.0862, GNorm = 0.9157, lr_0 = 8.1288e-04
Loss = 1.7992e-01, PNorm = 76.2054, GNorm = 0.8511, lr_0 = 8.1663e-04
Loss = 2.0022e-01, PNorm = 76.3271, GNorm = 0.8431, lr_0 = 8.2038e-04
Loss = 1.7583e-01, PNorm = 76.4443, GNorm = 0.7169, lr_0 = 8.2413e-04
Loss = 1.8885e-01, PNorm = 76.5694, GNorm = 0.8139, lr_0 = 8.2788e-04
Loss = 1.9810e-01, PNorm = 76.6833, GNorm = 1.0030, lr_0 = 8.3163e-04
Loss = 2.0047e-01, PNorm = 76.8079, GNorm = 1.0050, lr_0 = 8.3538e-04
Loss = 1.9001e-01, PNorm = 76.9271, GNorm = 1.1299, lr_0 = 8.3913e-04
Loss = 2.1799e-01, PNorm = 77.0534, GNorm = 0.8765, lr_0 = 8.4288e-04
Loss = 2.0201e-01, PNorm = 77.1852, GNorm = 1.0098, lr_0 = 8.4663e-04
Loss = 1.8798e-01, PNorm = 77.3038, GNorm = 0.6405, lr_0 = 8.5038e-04
Loss = 2.0709e-01, PNorm = 77.4405, GNorm = 0.7463, lr_0 = 8.5413e-04
Loss = 2.0174e-01, PNorm = 77.5611, GNorm = 0.6699, lr_0 = 8.5788e-04
Loss = 2.3678e-01, PNorm = 77.6825, GNorm = 1.1113, lr_0 = 8.6163e-04
Loss = 2.2277e-01, PNorm = 77.8198, GNorm = 1.0500, lr_0 = 8.6538e-04
Loss = 1.6976e-01, PNorm = 77.9549, GNorm = 0.6570, lr_0 = 8.6913e-04
Loss = 2.0407e-01, PNorm = 78.0823, GNorm = 0.8499, lr_0 = 8.7288e-04
Loss = 2.1315e-01, PNorm = 78.2061, GNorm = 0.7120, lr_0 = 8.7663e-04
Loss = 2.2793e-01, PNorm = 78.3425, GNorm = 0.9047, lr_0 = 8.8038e-04
Loss = 2.4820e-01, PNorm = 78.4684, GNorm = 0.9204, lr_0 = 8.8413e-04
Loss = 2.2564e-01, PNorm = 78.6150, GNorm = 1.2153, lr_0 = 8.8788e-04
Loss = 2.3552e-01, PNorm = 78.7536, GNorm = 0.7074, lr_0 = 8.9163e-04
Loss = 2.1974e-01, PNorm = 78.9015, GNorm = 1.3185, lr_0 = 8.9538e-04
Loss = 2.2778e-01, PNorm = 79.0437, GNorm = 0.7789, lr_0 = 8.9913e-04
Loss = 2.2443e-01, PNorm = 79.1798, GNorm = 1.6672, lr_0 = 9.0288e-04
Loss = 1.8721e-01, PNorm = 79.3149, GNorm = 0.7299, lr_0 = 9.0663e-04
Loss = 1.9895e-01, PNorm = 79.4412, GNorm = 0.7215, lr_0 = 9.1038e-04
Loss = 1.9283e-01, PNorm = 79.5778, GNorm = 1.1369, lr_0 = 9.1413e-04
Loss = 2.1935e-01, PNorm = 79.7162, GNorm = 0.7944, lr_0 = 9.1788e-04
Loss = 1.9997e-01, PNorm = 79.8694, GNorm = 0.7123, lr_0 = 9.2163e-04
Loss = 2.0844e-01, PNorm = 80.0136, GNorm = 0.9228, lr_0 = 9.2538e-04
Loss = 2.4354e-01, PNorm = 80.1590, GNorm = 0.9649, lr_0 = 9.2913e-04
Loss = 2.1108e-01, PNorm = 80.3149, GNorm = 0.8669, lr_0 = 9.3288e-04
Loss = 2.3324e-01, PNorm = 80.4600, GNorm = 0.9992, lr_0 = 9.3663e-04
Loss = 1.8538e-01, PNorm = 80.6095, GNorm = 0.5920, lr_0 = 9.4038e-04
Loss = 2.2760e-01, PNorm = 80.7617, GNorm = 1.0543, lr_0 = 9.4413e-04
Loss = 1.7612e-01, PNorm = 80.9122, GNorm = 0.6926, lr_0 = 9.4788e-04
Loss = 2.0926e-01, PNorm = 81.0558, GNorm = 0.7099, lr_0 = 9.5163e-04
Loss = 2.0893e-01, PNorm = 81.2024, GNorm = 1.0007, lr_0 = 9.5538e-04
Loss = 2.3578e-01, PNorm = 81.3590, GNorm = 0.7416, lr_0 = 9.5913e-04
Loss = 2.0146e-01, PNorm = 81.5095, GNorm = 1.1887, lr_0 = 9.6288e-04
Loss = 2.2663e-01, PNorm = 81.6739, GNorm = 1.4244, lr_0 = 9.6663e-04
Loss = 1.9443e-01, PNorm = 81.8297, GNorm = 0.5543, lr_0 = 9.7038e-04
Loss = 2.1128e-01, PNorm = 81.9861, GNorm = 0.8640, lr_0 = 9.7413e-04
Loss = 1.7711e-01, PNorm = 82.1264, GNorm = 0.7132, lr_0 = 9.7788e-04
Loss = 1.9074e-01, PNorm = 82.2659, GNorm = 0.8846, lr_0 = 9.8163e-04
Loss = 2.0456e-01, PNorm = 82.3858, GNorm = 0.8981, lr_0 = 9.8537e-04
Loss = 2.2691e-01, PNorm = 82.5326, GNorm = 1.1408, lr_0 = 9.8912e-04
Loss = 2.2221e-01, PNorm = 82.6690, GNorm = 1.5500, lr_0 = 9.9288e-04
Loss = 2.1376e-01, PNorm = 82.8353, GNorm = 1.8900, lr_0 = 9.9663e-04
Loss = 2.1780e-01, PNorm = 82.9767, GNorm = 0.8564, lr_0 = 9.9993e-04
Validation mae = 0.310065
Epoch 2
Loss = 1.4676e-01, PNorm = 83.1281, GNorm = 0.7564, lr_0 = 9.9925e-04
Loss = 1.3585e-01, PNorm = 83.2670, GNorm = 1.0396, lr_0 = 9.9856e-04
Loss = 1.2271e-01, PNorm = 83.3757, GNorm = 0.5751, lr_0 = 9.9788e-04
Loss = 1.2265e-01, PNorm = 83.4741, GNorm = 0.6031, lr_0 = 9.9719e-04
Loss = 1.3142e-01, PNorm = 83.5780, GNorm = 0.8205, lr_0 = 9.9651e-04
Loss = 1.3236e-01, PNorm = 83.6668, GNorm = 0.5639, lr_0 = 9.9583e-04
Loss = 1.3339e-01, PNorm = 83.7904, GNorm = 0.7729, lr_0 = 9.9515e-04
Loss = 1.1699e-01, PNorm = 83.8777, GNorm = 0.6769, lr_0 = 9.9446e-04
Loss = 1.2726e-01, PNorm = 83.9873, GNorm = 1.1235, lr_0 = 9.9378e-04
Loss = 1.1386e-01, PNorm = 84.0887, GNorm = 0.5160, lr_0 = 9.9310e-04
Loss = 1.1626e-01, PNorm = 84.1873, GNorm = 0.5407, lr_0 = 9.9242e-04
Loss = 1.3598e-01, PNorm = 84.2882, GNorm = 1.0115, lr_0 = 9.9174e-04
Loss = 1.1276e-01, PNorm = 84.3870, GNorm = 0.6689, lr_0 = 9.9106e-04
Loss = 1.2571e-01, PNorm = 84.4971, GNorm = 0.5937, lr_0 = 9.9038e-04
Loss = 1.2484e-01, PNorm = 84.6157, GNorm = 0.5060, lr_0 = 9.8971e-04
Loss = 1.1569e-01, PNorm = 84.7206, GNorm = 0.7298, lr_0 = 9.8903e-04
Loss = 1.0779e-01, PNorm = 84.8310, GNorm = 0.5914, lr_0 = 9.8835e-04
Loss = 1.2640e-01, PNorm = 84.9345, GNorm = 0.7537, lr_0 = 9.8767e-04
Loss = 1.4076e-01, PNorm = 85.0626, GNorm = 0.6191, lr_0 = 9.8700e-04
Loss = 1.3591e-01, PNorm = 85.1927, GNorm = 0.8405, lr_0 = 9.8632e-04
Loss = 1.3323e-01, PNorm = 85.3073, GNorm = 0.6571, lr_0 = 9.8564e-04
Loss = 1.3153e-01, PNorm = 85.4504, GNorm = 0.6408, lr_0 = 9.8497e-04
Loss = 1.3540e-01, PNorm = 85.5715, GNorm = 0.5538, lr_0 = 9.8429e-04
Loss = 1.3357e-01, PNorm = 85.7069, GNorm = 0.5631, lr_0 = 9.8362e-04
Loss = 1.4286e-01, PNorm = 85.8354, GNorm = 0.8818, lr_0 = 9.8295e-04
Loss = 1.2102e-01, PNorm = 85.9623, GNorm = 0.7428, lr_0 = 9.8227e-04
Loss = 1.2878e-01, PNorm = 86.0879, GNorm = 0.8162, lr_0 = 9.8160e-04
Loss = 1.2470e-01, PNorm = 86.2082, GNorm = 0.5045, lr_0 = 9.8093e-04
Loss = 1.2272e-01, PNorm = 86.3274, GNorm = 0.5672, lr_0 = 9.8026e-04
Loss = 1.6316e-01, PNorm = 86.4473, GNorm = 0.5425, lr_0 = 9.7958e-04
Loss = 1.2863e-01, PNorm = 86.5880, GNorm = 0.4679, lr_0 = 9.7891e-04
Loss = 1.2713e-01, PNorm = 86.7039, GNorm = 0.6205, lr_0 = 9.7824e-04
Loss = 1.1920e-01, PNorm = 86.8312, GNorm = 0.6336, lr_0 = 9.7757e-04
Loss = 1.2984e-01, PNorm = 86.9464, GNorm = 0.6280, lr_0 = 9.7690e-04
Loss = 1.3609e-01, PNorm = 87.0678, GNorm = 0.4168, lr_0 = 9.7623e-04
Loss = 1.3868e-01, PNorm = 87.1919, GNorm = 0.6418, lr_0 = 9.7556e-04
Loss = 1.4549e-01, PNorm = 87.3100, GNorm = 0.7683, lr_0 = 9.7490e-04
Loss = 1.3087e-01, PNorm = 87.4433, GNorm = 0.6055, lr_0 = 9.7423e-04
Loss = 1.3302e-01, PNorm = 87.5537, GNorm = 0.4347, lr_0 = 9.7356e-04
Loss = 1.3126e-01, PNorm = 87.6859, GNorm = 0.4319, lr_0 = 9.7289e-04
Loss = 1.2570e-01, PNorm = 87.8032, GNorm = 0.8285, lr_0 = 9.7223e-04
Loss = 1.3379e-01, PNorm = 87.9370, GNorm = 0.6934, lr_0 = 9.7156e-04
Loss = 1.5288e-01, PNorm = 88.0587, GNorm = 0.5261, lr_0 = 9.7090e-04
Loss = 1.5076e-01, PNorm = 88.2011, GNorm = 0.8261, lr_0 = 9.7023e-04
Loss = 1.2852e-01, PNorm = 88.3306, GNorm = 0.7050, lr_0 = 9.6957e-04
Loss = 1.1461e-01, PNorm = 88.4674, GNorm = 0.5605, lr_0 = 9.6890e-04
Loss = 1.4303e-01, PNorm = 88.5802, GNorm = 0.6286, lr_0 = 9.6824e-04
Loss = 1.3029e-01, PNorm = 88.7156, GNorm = 0.5387, lr_0 = 9.6757e-04
Loss = 1.1912e-01, PNorm = 88.8434, GNorm = 0.9325, lr_0 = 9.6691e-04
Loss = 1.2125e-01, PNorm = 88.9714, GNorm = 0.5752, lr_0 = 9.6625e-04
Loss = 1.4031e-01, PNorm = 89.0944, GNorm = 0.8555, lr_0 = 9.6559e-04
Loss = 1.4086e-01, PNorm = 89.2200, GNorm = 0.4392, lr_0 = 9.6493e-04
Loss = 1.4215e-01, PNorm = 89.3593, GNorm = 0.7693, lr_0 = 9.6427e-04
Loss = 1.3334e-01, PNorm = 89.4921, GNorm = 0.5912, lr_0 = 9.6360e-04
Loss = 1.5104e-01, PNorm = 89.6229, GNorm = 0.6557, lr_0 = 9.6294e-04
Loss = 1.2839e-01, PNorm = 89.7583, GNorm = 1.1220, lr_0 = 9.6228e-04
Loss = 1.3265e-01, PNorm = 89.8884, GNorm = 0.7533, lr_0 = 9.6163e-04
Loss = 1.4289e-01, PNorm = 90.0196, GNorm = 0.9895, lr_0 = 9.6097e-04
Loss = 1.4294e-01, PNorm = 90.1492, GNorm = 0.8535, lr_0 = 9.6031e-04
Loss = 1.4355e-01, PNorm = 90.2783, GNorm = 0.4216, lr_0 = 9.5965e-04
Loss = 1.3727e-01, PNorm = 90.4080, GNorm = 0.6253, lr_0 = 9.5899e-04
Loss = 1.5227e-01, PNorm = 90.5413, GNorm = 0.7730, lr_0 = 9.5834e-04
Loss = 1.3475e-01, PNorm = 90.6706, GNorm = 0.6157, lr_0 = 9.5768e-04
Loss = 1.3241e-01, PNorm = 90.8055, GNorm = 0.5957, lr_0 = 9.5702e-04
Loss = 1.4804e-01, PNorm = 90.9275, GNorm = 0.9065, lr_0 = 9.5637e-04
Loss = 1.4420e-01, PNorm = 91.0773, GNorm = 0.7498, lr_0 = 9.5571e-04
Loss = 1.4135e-01, PNorm = 91.2082, GNorm = 0.9884, lr_0 = 9.5506e-04
Loss = 1.3141e-01, PNorm = 91.3425, GNorm = 0.6823, lr_0 = 9.5440e-04
Loss = 1.2918e-01, PNorm = 91.4674, GNorm = 0.4937, lr_0 = 9.5375e-04
Loss = 1.4777e-01, PNorm = 91.5849, GNorm = 0.8161, lr_0 = 9.5310e-04
Loss = 1.5347e-01, PNorm = 91.7188, GNorm = 0.6040, lr_0 = 9.5244e-04
Loss = 1.5234e-01, PNorm = 91.8402, GNorm = 1.2322, lr_0 = 9.5179e-04
Loss = 1.5507e-01, PNorm = 91.9772, GNorm = 1.1182, lr_0 = 9.5114e-04
Loss = 1.2799e-01, PNorm = 92.1124, GNorm = 0.7708, lr_0 = 9.5049e-04
Loss = 1.4696e-01, PNorm = 92.2449, GNorm = 1.0320, lr_0 = 9.4984e-04
Loss = 1.5100e-01, PNorm = 92.3633, GNorm = 0.6234, lr_0 = 9.4919e-04
Loss = 1.4932e-01, PNorm = 92.5048, GNorm = 0.6799, lr_0 = 9.4854e-04
Loss = 1.3712e-01, PNorm = 92.6289, GNorm = 0.4871, lr_0 = 9.4789e-04
Loss = 1.1795e-01, PNorm = 92.7530, GNorm = 0.6275, lr_0 = 9.4724e-04
Loss = 1.5133e-01, PNorm = 92.8652, GNorm = 0.4818, lr_0 = 9.4659e-04
Loss = 1.4579e-01, PNorm = 92.9912, GNorm = 1.2618, lr_0 = 9.4594e-04
Loss = 1.5432e-01, PNorm = 93.1258, GNorm = 0.7497, lr_0 = 9.4529e-04
Loss = 1.4955e-01, PNorm = 93.2605, GNorm = 0.5592, lr_0 = 9.4464e-04
Loss = 1.4066e-01, PNorm = 93.4072, GNorm = 0.6653, lr_0 = 9.4400e-04
Loss = 1.3526e-01, PNorm = 93.5366, GNorm = 0.8111, lr_0 = 9.4335e-04
Loss = 1.4099e-01, PNorm = 93.6809, GNorm = 1.0851, lr_0 = 9.4270e-04
Loss = 1.2785e-01, PNorm = 93.8123, GNorm = 0.5384, lr_0 = 9.4206e-04
Loss = 1.3423e-01, PNorm = 93.9372, GNorm = 0.9237, lr_0 = 9.4141e-04
Loss = 1.3077e-01, PNorm = 94.0588, GNorm = 0.6483, lr_0 = 9.4077e-04
Loss = 1.3143e-01, PNorm = 94.1985, GNorm = 0.5518, lr_0 = 9.4012e-04
Loss = 1.3652e-01, PNorm = 94.3191, GNorm = 0.9604, lr_0 = 9.3948e-04
Loss = 1.3943e-01, PNorm = 94.4488, GNorm = 1.1288, lr_0 = 9.3884e-04
Loss = 1.5785e-01, PNorm = 94.5688, GNorm = 0.5333, lr_0 = 9.3819e-04
Loss = 1.5719e-01, PNorm = 94.7081, GNorm = 0.9343, lr_0 = 9.3755e-04
Loss = 1.5627e-01, PNorm = 94.8482, GNorm = 1.0720, lr_0 = 9.3691e-04
Loss = 1.4572e-01, PNorm = 94.9929, GNorm = 0.8800, lr_0 = 9.3627e-04
Loss = 1.3189e-01, PNorm = 95.1209, GNorm = 0.5331, lr_0 = 9.3562e-04
Loss = 1.4022e-01, PNorm = 95.2620, GNorm = 1.4908, lr_0 = 9.3498e-04
Loss = 1.6891e-01, PNorm = 95.3819, GNorm = 0.7741, lr_0 = 9.3434e-04
Loss = 1.3858e-01, PNorm = 95.5256, GNorm = 0.6573, lr_0 = 9.3370e-04
Loss = 1.5472e-01, PNorm = 95.6436, GNorm = 1.6106, lr_0 = 9.3306e-04
Loss = 1.4215e-01, PNorm = 95.7655, GNorm = 0.5352, lr_0 = 9.3242e-04
Loss = 1.1993e-01, PNorm = 95.8822, GNorm = 0.5455, lr_0 = 9.3178e-04
Loss = 1.4649e-01, PNorm = 95.9944, GNorm = 0.7232, lr_0 = 9.3115e-04
Loss = 1.5024e-01, PNorm = 96.1151, GNorm = 0.7437, lr_0 = 9.3051e-04
Loss = 1.3811e-01, PNorm = 96.2283, GNorm = 0.5545, lr_0 = 9.2987e-04
Loss = 1.6020e-01, PNorm = 96.3614, GNorm = 0.6224, lr_0 = 9.2923e-04
Loss = 1.4032e-01, PNorm = 96.4941, GNorm = 0.5090, lr_0 = 9.2860e-04
Loss = 1.5612e-01, PNorm = 96.6107, GNorm = 0.6549, lr_0 = 9.2796e-04
Loss = 1.4758e-01, PNorm = 96.7315, GNorm = 0.7017, lr_0 = 9.2733e-04
Loss = 1.2843e-01, PNorm = 96.8619, GNorm = 0.5413, lr_0 = 9.2669e-04
Loss = 1.5579e-01, PNorm = 96.9911, GNorm = 0.5309, lr_0 = 9.2606e-04
Loss = 1.4813e-01, PNorm = 97.1236, GNorm = 0.7995, lr_0 = 9.2542e-04
Loss = 1.4815e-01, PNorm = 97.2597, GNorm = 1.0357, lr_0 = 9.2479e-04
Loss = 1.6191e-01, PNorm = 97.3841, GNorm = 0.8744, lr_0 = 9.2415e-04
Loss = 1.5461e-01, PNorm = 97.5216, GNorm = 1.0307, lr_0 = 9.2352e-04
Loss = 1.3387e-01, PNorm = 97.6461, GNorm = 1.1610, lr_0 = 9.2289e-04
Loss = 1.5843e-01, PNorm = 97.7865, GNorm = 0.7421, lr_0 = 9.2226e-04
Loss = 1.5778e-01, PNorm = 97.9045, GNorm = 0.5989, lr_0 = 9.2162e-04
Loss = 1.3686e-01, PNorm = 98.0427, GNorm = 1.2408, lr_0 = 9.2099e-04
Validation mae = 0.295455
Epoch 3
Loss = 8.1172e-02, PNorm = 98.1479, GNorm = 0.6174, lr_0 = 9.2036e-04
Loss = 7.9903e-02, PNorm = 98.2392, GNorm = 0.4487, lr_0 = 9.1973e-04
Loss = 9.5472e-02, PNorm = 98.3142, GNorm = 1.3964, lr_0 = 9.1910e-04
Loss = 8.7329e-02, PNorm = 98.3979, GNorm = 0.7194, lr_0 = 9.1847e-04
Loss = 8.4146e-02, PNorm = 98.4729, GNorm = 0.6985, lr_0 = 9.1784e-04
Loss = 6.8207e-02, PNorm = 98.5514, GNorm = 0.3796, lr_0 = 9.1721e-04
Loss = 8.0048e-02, PNorm = 98.6193, GNorm = 0.5123, lr_0 = 9.1658e-04
Loss = 8.3157e-02, PNorm = 98.7084, GNorm = 0.3776, lr_0 = 9.1596e-04
Loss = 7.9070e-02, PNorm = 98.7874, GNorm = 0.5505, lr_0 = 9.1533e-04
Loss = 8.6893e-02, PNorm = 98.8700, GNorm = 0.6848, lr_0 = 9.1470e-04
Loss = 8.0856e-02, PNorm = 98.9379, GNorm = 0.3286, lr_0 = 9.1408e-04
Loss = 7.1631e-02, PNorm = 99.0142, GNorm = 0.4634, lr_0 = 9.1345e-04
Loss = 7.0274e-02, PNorm = 99.0890, GNorm = 0.7366, lr_0 = 9.1282e-04
Loss = 8.0611e-02, PNorm = 99.1626, GNorm = 0.4821, lr_0 = 9.1220e-04
Loss = 8.2407e-02, PNorm = 99.2388, GNorm = 0.4720, lr_0 = 9.1157e-04
Loss = 7.5929e-02, PNorm = 99.3295, GNorm = 0.6670, lr_0 = 9.1095e-04
Loss = 8.6482e-02, PNorm = 99.4008, GNorm = 1.0306, lr_0 = 9.1032e-04
Loss = 7.7316e-02, PNorm = 99.4794, GNorm = 0.5473, lr_0 = 9.0970e-04
Loss = 7.5987e-02, PNorm = 99.5529, GNorm = 0.7114, lr_0 = 9.0908e-04
Loss = 8.1221e-02, PNorm = 99.6273, GNorm = 0.4038, lr_0 = 9.0846e-04
Loss = 7.8609e-02, PNorm = 99.7094, GNorm = 0.4248, lr_0 = 9.0783e-04
Loss = 7.3032e-02, PNorm = 99.7897, GNorm = 0.4326, lr_0 = 9.0721e-04
Loss = 8.4473e-02, PNorm = 99.8659, GNorm = 0.4734, lr_0 = 9.0659e-04
Loss = 7.9757e-02, PNorm = 99.9455, GNorm = 0.4716, lr_0 = 9.0597e-04
Loss = 7.3900e-02, PNorm = 100.0363, GNorm = 0.4178, lr_0 = 9.0535e-04
Loss = 8.4026e-02, PNorm = 100.1263, GNorm = 0.7138, lr_0 = 9.0473e-04
Loss = 8.4541e-02, PNorm = 100.2104, GNorm = 0.4624, lr_0 = 9.0411e-04
Loss = 8.0232e-02, PNorm = 100.3035, GNorm = 0.4906, lr_0 = 9.0349e-04
Loss = 7.7237e-02, PNorm = 100.3954, GNorm = 0.4461, lr_0 = 9.0287e-04
Loss = 8.3153e-02, PNorm = 100.4927, GNorm = 0.5992, lr_0 = 9.0225e-04
Loss = 7.8194e-02, PNorm = 100.5726, GNorm = 0.8941, lr_0 = 9.0163e-04
Loss = 8.9899e-02, PNorm = 100.6696, GNorm = 0.4777, lr_0 = 9.0102e-04
Loss = 7.9526e-02, PNorm = 100.7534, GNorm = 0.3531, lr_0 = 9.0040e-04
Loss = 8.5579e-02, PNorm = 100.8510, GNorm = 0.4846, lr_0 = 8.9978e-04
Loss = 7.6678e-02, PNorm = 100.9306, GNorm = 0.6817, lr_0 = 8.9916e-04
Loss = 9.0034e-02, PNorm = 101.0269, GNorm = 0.8939, lr_0 = 8.9855e-04
Loss = 8.0966e-02, PNorm = 101.1255, GNorm = 0.3653, lr_0 = 8.9793e-04
Loss = 8.6392e-02, PNorm = 101.2248, GNorm = 0.3805, lr_0 = 8.9732e-04
Loss = 6.9782e-02, PNorm = 101.3125, GNorm = 0.5011, lr_0 = 8.9670e-04
Loss = 8.6068e-02, PNorm = 101.4084, GNorm = 1.0208, lr_0 = 8.9609e-04
Loss = 7.9200e-02, PNorm = 101.4932, GNorm = 0.6220, lr_0 = 8.9548e-04
Loss = 6.9126e-02, PNorm = 101.5906, GNorm = 0.6179, lr_0 = 8.9486e-04
Loss = 6.6711e-02, PNorm = 101.6699, GNorm = 0.3992, lr_0 = 8.9425e-04
Loss = 8.6336e-02, PNorm = 101.7518, GNorm = 0.3892, lr_0 = 8.9364e-04
Loss = 7.4358e-02, PNorm = 101.8419, GNorm = 0.7786, lr_0 = 8.9302e-04
Loss = 9.1813e-02, PNorm = 101.9326, GNorm = 0.5420, lr_0 = 8.9241e-04
Loss = 8.9173e-02, PNorm = 102.0406, GNorm = 0.5247, lr_0 = 8.9180e-04
Loss = 9.8821e-02, PNorm = 102.1409, GNorm = 0.7916, lr_0 = 8.9119e-04
Loss = 8.4365e-02, PNorm = 102.2404, GNorm = 0.4057, lr_0 = 8.9058e-04
Loss = 9.2768e-02, PNorm = 102.3429, GNorm = 0.5623, lr_0 = 8.8997e-04
Loss = 8.1038e-02, PNorm = 102.4409, GNorm = 0.3683, lr_0 = 8.8936e-04
Loss = 8.9338e-02, PNorm = 102.5276, GNorm = 0.5933, lr_0 = 8.8875e-04
Loss = 8.5641e-02, PNorm = 102.6199, GNorm = 0.6887, lr_0 = 8.8814e-04
Loss = 7.9893e-02, PNorm = 102.7064, GNorm = 0.9217, lr_0 = 8.8753e-04
Loss = 9.3519e-02, PNorm = 102.8039, GNorm = 0.3965, lr_0 = 8.8693e-04
Loss = 9.1649e-02, PNorm = 102.9023, GNorm = 0.5876, lr_0 = 8.8632e-04
Loss = 8.4187e-02, PNorm = 102.9910, GNorm = 0.3813, lr_0 = 8.8571e-04
Loss = 9.6657e-02, PNorm = 103.0817, GNorm = 0.5300, lr_0 = 8.8510e-04
Loss = 7.3702e-02, PNorm = 103.1685, GNorm = 0.6692, lr_0 = 8.8450e-04
Loss = 8.8783e-02, PNorm = 103.2570, GNorm = 0.5399, lr_0 = 8.8389e-04
Loss = 7.7870e-02, PNorm = 103.3368, GNorm = 0.3951, lr_0 = 8.8329e-04
Loss = 9.1962e-02, PNorm = 103.4326, GNorm = 1.2043, lr_0 = 8.8268e-04
Loss = 9.1749e-02, PNorm = 103.5208, GNorm = 0.7665, lr_0 = 8.8208e-04
Loss = 9.7104e-02, PNorm = 103.6279, GNorm = 0.5636, lr_0 = 8.8147e-04
Loss = 8.2842e-02, PNorm = 103.7206, GNorm = 0.4269, lr_0 = 8.8087e-04
Loss = 9.9037e-02, PNorm = 103.8211, GNorm = 0.4386, lr_0 = 8.8026e-04
Loss = 7.9959e-02, PNorm = 103.9179, GNorm = 0.5603, lr_0 = 8.7966e-04
Loss = 9.2223e-02, PNorm = 104.0127, GNorm = 1.0902, lr_0 = 8.7906e-04
Loss = 9.2257e-02, PNorm = 104.1080, GNorm = 0.3450, lr_0 = 8.7846e-04
Loss = 9.2709e-02, PNorm = 104.2078, GNorm = 0.5424, lr_0 = 8.7785e-04
Loss = 9.5495e-02, PNorm = 104.3189, GNorm = 0.5775, lr_0 = 8.7725e-04
Loss = 9.5102e-02, PNorm = 104.4266, GNorm = 0.7198, lr_0 = 8.7665e-04
Loss = 8.5595e-02, PNorm = 104.5312, GNorm = 0.6243, lr_0 = 8.7605e-04
Loss = 9.4032e-02, PNorm = 104.6315, GNorm = 0.9418, lr_0 = 8.7545e-04
Loss = 7.7887e-02, PNorm = 104.7175, GNorm = 0.4392, lr_0 = 8.7485e-04
Loss = 7.1063e-02, PNorm = 104.8107, GNorm = 0.4598, lr_0 = 8.7425e-04
Loss = 9.0922e-02, PNorm = 104.8997, GNorm = 0.5793, lr_0 = 8.7365e-04
Loss = 8.2809e-02, PNorm = 104.9923, GNorm = 0.7322, lr_0 = 8.7306e-04
Loss = 8.1303e-02, PNorm = 105.0919, GNorm = 0.3117, lr_0 = 8.7246e-04
Loss = 8.1754e-02, PNorm = 105.1890, GNorm = 0.8395, lr_0 = 8.7186e-04
Loss = 7.4401e-02, PNorm = 105.2851, GNorm = 0.4164, lr_0 = 8.7126e-04
Loss = 8.4398e-02, PNorm = 105.3652, GNorm = 0.6790, lr_0 = 8.7067e-04
Loss = 9.9862e-02, PNorm = 105.4593, GNorm = 0.7796, lr_0 = 8.7007e-04
Loss = 9.2803e-02, PNorm = 105.5684, GNorm = 0.7080, lr_0 = 8.6947e-04
Loss = 8.8929e-02, PNorm = 105.6671, GNorm = 0.3555, lr_0 = 8.6888e-04
Loss = 8.7853e-02, PNorm = 105.7760, GNorm = 0.5747, lr_0 = 8.6828e-04
Loss = 9.7016e-02, PNorm = 105.8769, GNorm = 0.7096, lr_0 = 8.6769e-04
Loss = 7.5815e-02, PNorm = 105.9808, GNorm = 0.5248, lr_0 = 8.6709e-04
Loss = 1.0659e-01, PNorm = 106.0713, GNorm = 0.5711, lr_0 = 8.6650e-04
Loss = 8.9935e-02, PNorm = 106.1850, GNorm = 0.6258, lr_0 = 8.6590e-04
Loss = 1.0525e-01, PNorm = 106.2973, GNorm = 0.4923, lr_0 = 8.6531e-04
Loss = 8.3731e-02, PNorm = 106.4121, GNorm = 0.6393, lr_0 = 8.6472e-04
Loss = 8.3578e-02, PNorm = 106.5085, GNorm = 0.4504, lr_0 = 8.6413e-04
Loss = 1.0860e-01, PNorm = 106.6222, GNorm = 0.6368, lr_0 = 8.6353e-04
Loss = 8.7852e-02, PNorm = 106.7382, GNorm = 0.7999, lr_0 = 8.6294e-04
Loss = 1.0568e-01, PNorm = 106.8427, GNorm = 0.8456, lr_0 = 8.6235e-04
Loss = 9.0204e-02, PNorm = 106.9541, GNorm = 0.6201, lr_0 = 8.6176e-04
Loss = 9.0039e-02, PNorm = 107.0631, GNorm = 0.6341, lr_0 = 8.6117e-04
Loss = 9.1796e-02, PNorm = 107.1850, GNorm = 0.6323, lr_0 = 8.6058e-04
Loss = 9.0627e-02, PNorm = 107.2817, GNorm = 0.7539, lr_0 = 8.5999e-04
Loss = 9.4971e-02, PNorm = 107.3973, GNorm = 0.6811, lr_0 = 8.5940e-04
Loss = 9.6092e-02, PNorm = 107.5062, GNorm = 0.4240, lr_0 = 8.5881e-04
Loss = 9.5636e-02, PNorm = 107.6179, GNorm = 0.5358, lr_0 = 8.5823e-04
Loss = 9.3852e-02, PNorm = 107.7247, GNorm = 0.8567, lr_0 = 8.5764e-04
Loss = 9.4134e-02, PNorm = 107.8357, GNorm = 0.7694, lr_0 = 8.5705e-04
Loss = 7.9313e-02, PNorm = 107.9382, GNorm = 0.5003, lr_0 = 8.5646e-04
Loss = 8.6422e-02, PNorm = 108.0446, GNorm = 0.6209, lr_0 = 8.5588e-04
Loss = 1.0307e-01, PNorm = 108.1493, GNorm = 0.8163, lr_0 = 8.5529e-04
Loss = 9.0926e-02, PNorm = 108.2513, GNorm = 0.7665, lr_0 = 8.5470e-04
Loss = 9.8090e-02, PNorm = 108.3595, GNorm = 0.6664, lr_0 = 8.5412e-04
Loss = 9.1612e-02, PNorm = 108.4590, GNorm = 0.5852, lr_0 = 8.5353e-04
Loss = 8.3368e-02, PNorm = 108.5595, GNorm = 0.4619, lr_0 = 8.5295e-04
Loss = 9.5104e-02, PNorm = 108.6510, GNorm = 0.5940, lr_0 = 8.5236e-04
Loss = 1.1244e-01, PNorm = 108.7650, GNorm = 0.7352, lr_0 = 8.5178e-04
Loss = 8.9176e-02, PNorm = 108.8850, GNorm = 0.4058, lr_0 = 8.5120e-04
Loss = 1.0005e-01, PNorm = 108.9902, GNorm = 0.4013, lr_0 = 8.5061e-04
Loss = 9.3173e-02, PNorm = 109.1108, GNorm = 0.4060, lr_0 = 8.5003e-04
Loss = 1.0058e-01, PNorm = 109.2235, GNorm = 0.4991, lr_0 = 8.4945e-04
Loss = 9.0731e-02, PNorm = 109.3465, GNorm = 0.5145, lr_0 = 8.4887e-04
Loss = 9.1972e-02, PNorm = 109.4478, GNorm = 0.5750, lr_0 = 8.4828e-04
Validation mae = 0.292777
Epoch 4
Loss = 5.7985e-02, PNorm = 109.5515, GNorm = 0.4523, lr_0 = 8.4770e-04
Loss = 6.4356e-02, PNorm = 109.6227, GNorm = 0.5102, lr_0 = 8.4712e-04
Loss = 5.7954e-02, PNorm = 109.7041, GNorm = 0.4382, lr_0 = 8.4654e-04
Loss = 6.0092e-02, PNorm = 109.7765, GNorm = 0.3102, lr_0 = 8.4596e-04
Loss = 6.8154e-02, PNorm = 109.8524, GNorm = 0.4023, lr_0 = 8.4538e-04
Loss = 4.8354e-02, PNorm = 109.9121, GNorm = 0.5623, lr_0 = 8.4480e-04
Loss = 4.8242e-02, PNorm = 109.9730, GNorm = 0.7353, lr_0 = 8.4423e-04
Loss = 6.0481e-02, PNorm = 110.0269, GNorm = 0.9326, lr_0 = 8.4365e-04
Loss = 5.5489e-02, PNorm = 110.0940, GNorm = 0.4991, lr_0 = 8.4307e-04
Loss = 5.4849e-02, PNorm = 110.1637, GNorm = 0.3025, lr_0 = 8.4249e-04
Loss = 5.4558e-02, PNorm = 110.2357, GNorm = 0.5726, lr_0 = 8.4191e-04
Loss = 5.5333e-02, PNorm = 110.3049, GNorm = 0.4296, lr_0 = 8.4134e-04
Loss = 5.1860e-02, PNorm = 110.3775, GNorm = 0.8650, lr_0 = 8.4076e-04
Loss = 6.0730e-02, PNorm = 110.4389, GNorm = 0.7309, lr_0 = 8.4019e-04
Loss = 5.5265e-02, PNorm = 110.5177, GNorm = 0.4845, lr_0 = 8.3961e-04
Loss = 5.3403e-02, PNorm = 110.5840, GNorm = 0.3885, lr_0 = 8.3903e-04
Loss = 6.1551e-02, PNorm = 110.6528, GNorm = 0.4404, lr_0 = 8.3846e-04
Loss = 5.5694e-02, PNorm = 110.7232, GNorm = 0.6017, lr_0 = 8.3789e-04
Loss = 5.7054e-02, PNorm = 110.7989, GNorm = 0.8256, lr_0 = 8.3731e-04
Loss = 5.1900e-02, PNorm = 110.8696, GNorm = 0.3125, lr_0 = 8.3674e-04
Loss = 5.2379e-02, PNorm = 110.9322, GNorm = 0.3783, lr_0 = 8.3616e-04
Loss = 6.6429e-02, PNorm = 110.9902, GNorm = 0.4585, lr_0 = 8.3559e-04
Loss = 4.7092e-02, PNorm = 111.0547, GNorm = 0.3801, lr_0 = 8.3502e-04
Loss = 5.1707e-02, PNorm = 111.1176, GNorm = 0.4154, lr_0 = 8.3445e-04
Loss = 5.8691e-02, PNorm = 111.1725, GNorm = 0.3428, lr_0 = 8.3388e-04
Loss = 5.6042e-02, PNorm = 111.2468, GNorm = 1.0337, lr_0 = 8.3330e-04
Loss = 6.0605e-02, PNorm = 111.3196, GNorm = 0.6781, lr_0 = 8.3273e-04
Loss = 5.7448e-02, PNorm = 111.4024, GNorm = 0.2227, lr_0 = 8.3216e-04
Loss = 5.8081e-02, PNorm = 111.4856, GNorm = 0.4243, lr_0 = 8.3159e-04
Loss = 6.2926e-02, PNorm = 111.5678, GNorm = 0.4310, lr_0 = 8.3102e-04
Loss = 6.1038e-02, PNorm = 111.6601, GNorm = 0.4432, lr_0 = 8.3045e-04
Loss = 4.9719e-02, PNorm = 111.7426, GNorm = 0.2897, lr_0 = 8.2988e-04
Loss = 5.6783e-02, PNorm = 111.8162, GNorm = 0.4380, lr_0 = 8.2932e-04
Loss = 5.5968e-02, PNorm = 111.8968, GNorm = 0.7705, lr_0 = 8.2875e-04
Loss = 6.4733e-02, PNorm = 111.9875, GNorm = 0.7115, lr_0 = 8.2818e-04
Loss = 6.2286e-02, PNorm = 112.0744, GNorm = 0.3493, lr_0 = 8.2761e-04
Loss = 5.1027e-02, PNorm = 112.1615, GNorm = 0.4467, lr_0 = 8.2705e-04
Loss = 4.7766e-02, PNorm = 112.2329, GNorm = 0.3828, lr_0 = 8.2648e-04
Loss = 5.5685e-02, PNorm = 112.3101, GNorm = 0.7196, lr_0 = 8.2591e-04
Loss = 6.1578e-02, PNorm = 112.3791, GNorm = 0.5592, lr_0 = 8.2535e-04
Loss = 5.5607e-02, PNorm = 112.4547, GNorm = 0.2858, lr_0 = 8.2478e-04
Loss = 6.1392e-02, PNorm = 112.5259, GNorm = 0.7515, lr_0 = 8.2422e-04
Loss = 6.3728e-02, PNorm = 112.6052, GNorm = 0.3873, lr_0 = 8.2365e-04
Loss = 6.5060e-02, PNorm = 112.6853, GNorm = 1.3251, lr_0 = 8.2309e-04
Loss = 5.6894e-02, PNorm = 112.7788, GNorm = 0.3195, lr_0 = 8.2252e-04
Loss = 5.5437e-02, PNorm = 112.8505, GNorm = 0.6033, lr_0 = 8.2196e-04
Loss = 6.4520e-02, PNorm = 112.9266, GNorm = 0.6714, lr_0 = 8.2140e-04
Loss = 6.0470e-02, PNorm = 113.0066, GNorm = 0.8106, lr_0 = 8.2084e-04
Loss = 5.3002e-02, PNorm = 113.0951, GNorm = 0.3025, lr_0 = 8.2027e-04
Loss = 5.2268e-02, PNorm = 113.1622, GNorm = 0.3635, lr_0 = 8.1971e-04
Loss = 6.0246e-02, PNorm = 113.2325, GNorm = 0.3487, lr_0 = 8.1915e-04
Loss = 6.1560e-02, PNorm = 113.3082, GNorm = 0.4025, lr_0 = 8.1859e-04
Loss = 5.5627e-02, PNorm = 113.3857, GNorm = 0.4565, lr_0 = 8.1803e-04
Loss = 5.9749e-02, PNorm = 113.4711, GNorm = 0.9393, lr_0 = 8.1747e-04
Loss = 5.5052e-02, PNorm = 113.5582, GNorm = 0.3669, lr_0 = 8.1691e-04
Loss = 5.1032e-02, PNorm = 113.6280, GNorm = 0.8581, lr_0 = 8.1635e-04
Loss = 5.2147e-02, PNorm = 113.7037, GNorm = 0.4460, lr_0 = 8.1579e-04
Loss = 6.0108e-02, PNorm = 113.7844, GNorm = 0.5796, lr_0 = 8.1523e-04
Loss = 5.8140e-02, PNorm = 113.8672, GNorm = 0.3847, lr_0 = 8.1467e-04
Loss = 5.3856e-02, PNorm = 113.9471, GNorm = 0.8244, lr_0 = 8.1411e-04
Loss = 6.0272e-02, PNorm = 114.0361, GNorm = 0.5909, lr_0 = 8.1355e-04
Loss = 6.2750e-02, PNorm = 114.1133, GNorm = 0.8619, lr_0 = 8.1300e-04
Loss = 5.6907e-02, PNorm = 114.2070, GNorm = 0.3060, lr_0 = 8.1244e-04
Loss = 6.1682e-02, PNorm = 114.2935, GNorm = 0.4318, lr_0 = 8.1188e-04
Loss = 6.3473e-02, PNorm = 114.3830, GNorm = 0.8031, lr_0 = 8.1133e-04
Loss = 6.5294e-02, PNorm = 114.4805, GNorm = 0.8682, lr_0 = 8.1077e-04
Loss = 6.2746e-02, PNorm = 114.5680, GNorm = 0.5131, lr_0 = 8.1022e-04
Loss = 5.7740e-02, PNorm = 114.6600, GNorm = 0.3663, lr_0 = 8.0966e-04
Loss = 5.9427e-02, PNorm = 114.7484, GNorm = 0.7455, lr_0 = 8.0911e-04
Loss = 6.0210e-02, PNorm = 114.8298, GNorm = 0.6867, lr_0 = 8.0855e-04
Loss = 6.1357e-02, PNorm = 114.9172, GNorm = 0.9683, lr_0 = 8.0800e-04
Loss = 5.6808e-02, PNorm = 115.0034, GNorm = 0.8189, lr_0 = 8.0745e-04
Loss = 6.3520e-02, PNorm = 115.0982, GNorm = 0.3375, lr_0 = 8.0689e-04
Loss = 6.6153e-02, PNorm = 115.1965, GNorm = 0.8695, lr_0 = 8.0634e-04
Loss = 6.6936e-02, PNorm = 115.2835, GNorm = 0.5600, lr_0 = 8.0579e-04
Loss = 5.6414e-02, PNorm = 115.3720, GNorm = 0.4190, lr_0 = 8.0523e-04
Loss = 6.0849e-02, PNorm = 115.4558, GNorm = 0.4922, lr_0 = 8.0468e-04
Loss = 7.2822e-02, PNorm = 115.5372, GNorm = 0.5880, lr_0 = 8.0413e-04
Loss = 6.2482e-02, PNorm = 115.6245, GNorm = 0.8750, lr_0 = 8.0358e-04
Loss = 6.4829e-02, PNorm = 115.7200, GNorm = 0.4457, lr_0 = 8.0303e-04
Loss = 5.8231e-02, PNorm = 115.8067, GNorm = 0.3490, lr_0 = 8.0248e-04
Loss = 6.3429e-02, PNorm = 115.8930, GNorm = 1.0235, lr_0 = 8.0193e-04
Loss = 6.3941e-02, PNorm = 115.9682, GNorm = 0.4431, lr_0 = 8.0138e-04
Loss = 6.0275e-02, PNorm = 116.0594, GNorm = 0.3938, lr_0 = 8.0083e-04
Loss = 5.3703e-02, PNorm = 116.1420, GNorm = 0.6922, lr_0 = 8.0028e-04
Loss = 7.2843e-02, PNorm = 116.2407, GNorm = 0.9699, lr_0 = 7.9974e-04
Loss = 5.4671e-02, PNorm = 116.3322, GNorm = 0.3564, lr_0 = 7.9919e-04
Loss = 6.6459e-02, PNorm = 116.4215, GNorm = 0.5025, lr_0 = 7.9864e-04
Loss = 7.1822e-02, PNorm = 116.5156, GNorm = 0.5646, lr_0 = 7.9809e-04
Loss = 6.4467e-02, PNorm = 116.6120, GNorm = 0.4325, lr_0 = 7.9755e-04
Loss = 6.3214e-02, PNorm = 116.7095, GNorm = 0.4278, lr_0 = 7.9700e-04
Loss = 7.0901e-02, PNorm = 116.8064, GNorm = 0.6374, lr_0 = 7.9645e-04
Loss = 6.7440e-02, PNorm = 116.9097, GNorm = 0.3514, lr_0 = 7.9591e-04
Loss = 7.6080e-02, PNorm = 117.0115, GNorm = 0.3964, lr_0 = 7.9536e-04
Loss = 5.7414e-02, PNorm = 117.1021, GNorm = 0.4266, lr_0 = 7.9482e-04
Loss = 5.8739e-02, PNorm = 117.1924, GNorm = 0.3054, lr_0 = 7.9427e-04
Loss = 6.5590e-02, PNorm = 117.2868, GNorm = 0.9883, lr_0 = 7.9373e-04
Loss = 6.8780e-02, PNorm = 117.3747, GNorm = 0.5427, lr_0 = 7.9319e-04
Loss = 7.6879e-02, PNorm = 117.4735, GNorm = 0.5857, lr_0 = 7.9264e-04
Loss = 6.6644e-02, PNorm = 117.5696, GNorm = 0.5392, lr_0 = 7.9210e-04
Loss = 6.7242e-02, PNorm = 117.6665, GNorm = 0.3612, lr_0 = 7.9156e-04
Loss = 6.1518e-02, PNorm = 117.7548, GNorm = 0.5591, lr_0 = 7.9101e-04
Loss = 6.4948e-02, PNorm = 117.8488, GNorm = 0.4181, lr_0 = 7.9047e-04
Loss = 7.1918e-02, PNorm = 117.9311, GNorm = 0.7210, lr_0 = 7.8993e-04
Loss = 7.4872e-02, PNorm = 118.0253, GNorm = 0.6998, lr_0 = 7.8939e-04
Loss = 6.8352e-02, PNorm = 118.1220, GNorm = 0.6362, lr_0 = 7.8885e-04
Loss = 7.4792e-02, PNorm = 118.2244, GNorm = 0.7036, lr_0 = 7.8831e-04
Loss = 6.4793e-02, PNorm = 118.3268, GNorm = 0.8090, lr_0 = 7.8777e-04
Loss = 7.5228e-02, PNorm = 118.4232, GNorm = 0.4295, lr_0 = 7.8723e-04
Loss = 6.9703e-02, PNorm = 118.5222, GNorm = 0.5778, lr_0 = 7.8669e-04
Loss = 7.6162e-02, PNorm = 118.6283, GNorm = 0.6561, lr_0 = 7.8615e-04
Loss = 6.6628e-02, PNorm = 118.7301, GNorm = 0.3156, lr_0 = 7.8561e-04
Loss = 7.3375e-02, PNorm = 118.8231, GNorm = 0.5642, lr_0 = 7.8507e-04
Loss = 7.5537e-02, PNorm = 118.9128, GNorm = 0.5263, lr_0 = 7.8454e-04
Loss = 7.4158e-02, PNorm = 119.0136, GNorm = 0.8215, lr_0 = 7.8400e-04
Loss = 7.2076e-02, PNorm = 119.1109, GNorm = 1.2877, lr_0 = 7.8346e-04
Loss = 8.1288e-02, PNorm = 119.2144, GNorm = 0.9792, lr_0 = 7.8293e-04
Loss = 7.3798e-02, PNorm = 119.3209, GNorm = 0.4411, lr_0 = 7.8239e-04
Loss = 6.9414e-02, PNorm = 119.4402, GNorm = 0.8652, lr_0 = 7.8185e-04
Loss = 7.1781e-02, PNorm = 119.5533, GNorm = 0.6654, lr_0 = 7.8132e-04
Validation mae = 0.288874
Epoch 5
Loss = 4.5028e-02, PNorm = 119.6498, GNorm = 0.2271, lr_0 = 7.8078e-04
Loss = 5.0865e-02, PNorm = 119.7307, GNorm = 1.1716, lr_0 = 7.8025e-04
Loss = 4.3761e-02, PNorm = 119.8012, GNorm = 0.5170, lr_0 = 7.7971e-04
Loss = 4.7891e-02, PNorm = 119.8573, GNorm = 0.6860, lr_0 = 7.7918e-04
Loss = 3.6067e-02, PNorm = 119.9110, GNorm = 0.6367, lr_0 = 7.7864e-04
Loss = 4.5030e-02, PNorm = 119.9702, GNorm = 0.4330, lr_0 = 7.7811e-04
Loss = 4.2689e-02, PNorm = 120.0207, GNorm = 0.4093, lr_0 = 7.7758e-04
Loss = 3.8831e-02, PNorm = 120.0785, GNorm = 0.5781, lr_0 = 7.7705e-04
Loss = 4.0112e-02, PNorm = 120.1316, GNorm = 0.5302, lr_0 = 7.7651e-04
Loss = 3.4578e-02, PNorm = 120.1851, GNorm = 0.6470, lr_0 = 7.7598e-04
Loss = 3.3603e-02, PNorm = 120.2389, GNorm = 0.3633, lr_0 = 7.7545e-04
Loss = 4.6550e-02, PNorm = 120.2931, GNorm = 0.5966, lr_0 = 7.7492e-04
Loss = 3.7470e-02, PNorm = 120.3524, GNorm = 0.3795, lr_0 = 7.7439e-04
Loss = 3.7935e-02, PNorm = 120.4088, GNorm = 0.4684, lr_0 = 7.7386e-04
Loss = 3.6770e-02, PNorm = 120.4635, GNorm = 0.3221, lr_0 = 7.7333e-04
Loss = 4.6682e-02, PNorm = 120.5156, GNorm = 0.8675, lr_0 = 7.7280e-04
Loss = 5.0973e-02, PNorm = 120.5764, GNorm = 0.6579, lr_0 = 7.7227e-04
Loss = 4.0913e-02, PNorm = 120.6330, GNorm = 0.5658, lr_0 = 7.7174e-04
Loss = 4.2247e-02, PNorm = 120.6919, GNorm = 0.3658, lr_0 = 7.7121e-04
Loss = 4.1469e-02, PNorm = 120.7493, GNorm = 0.4147, lr_0 = 7.7068e-04
Loss = 3.9309e-02, PNorm = 120.8203, GNorm = 0.3334, lr_0 = 7.7015e-04
Loss = 4.4874e-02, PNorm = 120.8790, GNorm = 0.5651, lr_0 = 7.6963e-04
Loss = 4.8629e-02, PNorm = 120.9487, GNorm = 0.5595, lr_0 = 7.6910e-04
Loss = 4.2108e-02, PNorm = 121.0150, GNorm = 0.3340, lr_0 = 7.6857e-04
Loss = 4.3027e-02, PNorm = 121.0859, GNorm = 0.7013, lr_0 = 7.6805e-04
Loss = 4.8767e-02, PNorm = 121.1498, GNorm = 0.7177, lr_0 = 7.6752e-04
Loss = 4.4060e-02, PNorm = 121.2231, GNorm = 0.2377, lr_0 = 7.6699e-04
Loss = 4.0448e-02, PNorm = 121.2869, GNorm = 0.5395, lr_0 = 7.6647e-04
Loss = 4.3594e-02, PNorm = 121.3653, GNorm = 0.3081, lr_0 = 7.6594e-04
Loss = 4.5593e-02, PNorm = 121.4281, GNorm = 0.4674, lr_0 = 7.6542e-04
Loss = 4.2957e-02, PNorm = 121.4970, GNorm = 0.3732, lr_0 = 7.6489e-04
Loss = 4.4398e-02, PNorm = 121.5666, GNorm = 0.8310, lr_0 = 7.6437e-04
Loss = 3.9612e-02, PNorm = 121.6359, GNorm = 0.4524, lr_0 = 7.6385e-04
Loss = 4.3191e-02, PNorm = 121.7069, GNorm = 0.2926, lr_0 = 7.6332e-04
Loss = 3.8943e-02, PNorm = 121.7697, GNorm = 0.3390, lr_0 = 7.6280e-04
Loss = 4.6349e-02, PNorm = 121.8364, GNorm = 0.2701, lr_0 = 7.6228e-04
Loss = 4.7992e-02, PNorm = 121.8958, GNorm = 0.3795, lr_0 = 7.6176e-04
Loss = 5.1659e-02, PNorm = 121.9738, GNorm = 0.6842, lr_0 = 7.6123e-04
Loss = 4.8818e-02, PNorm = 122.0363, GNorm = 0.3582, lr_0 = 7.6071e-04
Loss = 4.6444e-02, PNorm = 122.1123, GNorm = 0.5007, lr_0 = 7.6019e-04
Loss = 3.9219e-02, PNorm = 122.1812, GNorm = 0.4573, lr_0 = 7.5967e-04
Loss = 4.2134e-02, PNorm = 122.2430, GNorm = 0.5265, lr_0 = 7.5915e-04
Loss = 4.6230e-02, PNorm = 122.3087, GNorm = 0.3976, lr_0 = 7.5863e-04
Loss = 4.5632e-02, PNorm = 122.3823, GNorm = 0.6968, lr_0 = 7.5811e-04
Loss = 4.1691e-02, PNorm = 122.4459, GNorm = 0.2804, lr_0 = 7.5759e-04
Loss = 4.4480e-02, PNorm = 122.5128, GNorm = 0.3263, lr_0 = 7.5707e-04
Loss = 3.9030e-02, PNorm = 122.5754, GNorm = 0.2505, lr_0 = 7.5655e-04
Loss = 3.5523e-02, PNorm = 122.6405, GNorm = 0.3905, lr_0 = 7.5603e-04
Loss = 3.9801e-02, PNorm = 122.6983, GNorm = 0.8445, lr_0 = 7.5552e-04
Loss = 4.4487e-02, PNorm = 122.7573, GNorm = 0.2842, lr_0 = 7.5500e-04
Loss = 4.7082e-02, PNorm = 122.8215, GNorm = 0.6259, lr_0 = 7.5448e-04
Loss = 4.7337e-02, PNorm = 122.8977, GNorm = 0.7042, lr_0 = 7.5397e-04
Loss = 5.2710e-02, PNorm = 122.9809, GNorm = 0.3888, lr_0 = 7.5345e-04
Loss = 4.2907e-02, PNorm = 123.0619, GNorm = 0.4404, lr_0 = 7.5293e-04
Loss = 4.5538e-02, PNorm = 123.1277, GNorm = 0.4809, lr_0 = 7.5242e-04
Loss = 3.8931e-02, PNorm = 123.2027, GNorm = 1.1996, lr_0 = 7.5190e-04
Loss = 4.6694e-02, PNorm = 123.2717, GNorm = 0.4268, lr_0 = 7.5139e-04
Loss = 3.8796e-02, PNorm = 123.3415, GNorm = 0.3209, lr_0 = 7.5087e-04
Loss = 4.4122e-02, PNorm = 123.4081, GNorm = 0.5049, lr_0 = 7.5036e-04
Loss = 4.1655e-02, PNorm = 123.4749, GNorm = 0.4725, lr_0 = 7.4984e-04
Loss = 4.4998e-02, PNorm = 123.5459, GNorm = 0.6972, lr_0 = 7.4933e-04
Loss = 4.4592e-02, PNorm = 123.6175, GNorm = 0.4765, lr_0 = 7.4882e-04
Loss = 4.7358e-02, PNorm = 123.6818, GNorm = 0.8217, lr_0 = 7.4830e-04
Loss = 4.0365e-02, PNorm = 123.7487, GNorm = 0.4306, lr_0 = 7.4779e-04
Loss = 4.0214e-02, PNorm = 123.8151, GNorm = 0.4239, lr_0 = 7.4728e-04
Loss = 4.2561e-02, PNorm = 123.8851, GNorm = 0.6644, lr_0 = 7.4677e-04
Loss = 4.3152e-02, PNorm = 123.9541, GNorm = 0.5198, lr_0 = 7.4625e-04
Loss = 3.9274e-02, PNorm = 124.0135, GNorm = 0.3582, lr_0 = 7.4574e-04
Loss = 4.7341e-02, PNorm = 124.0844, GNorm = 0.9778, lr_0 = 7.4523e-04
Loss = 3.5442e-02, PNorm = 124.1484, GNorm = 0.3572, lr_0 = 7.4472e-04
Loss = 4.1347e-02, PNorm = 124.2278, GNorm = 0.4675, lr_0 = 7.4421e-04
Loss = 4.2646e-02, PNorm = 124.3037, GNorm = 0.4617, lr_0 = 7.4370e-04
Loss = 4.5220e-02, PNorm = 124.3824, GNorm = 0.6715, lr_0 = 7.4319e-04
Loss = 4.6368e-02, PNorm = 124.4564, GNorm = 1.1252, lr_0 = 7.4268e-04
Loss = 4.0364e-02, PNorm = 124.5333, GNorm = 0.3259, lr_0 = 7.4217e-04
Loss = 4.3944e-02, PNorm = 124.6113, GNorm = 0.9730, lr_0 = 7.4167e-04
Loss = 4.9206e-02, PNorm = 124.6866, GNorm = 0.2446, lr_0 = 7.4116e-04
Loss = 3.8278e-02, PNorm = 124.7604, GNorm = 0.4323, lr_0 = 7.4065e-04
Loss = 5.1717e-02, PNorm = 124.8186, GNorm = 0.9224, lr_0 = 7.4014e-04
Loss = 4.8262e-02, PNorm = 124.8988, GNorm = 0.6215, lr_0 = 7.3964e-04
Loss = 5.0529e-02, PNorm = 124.9784, GNorm = 0.5461, lr_0 = 7.3913e-04
Loss = 5.1380e-02, PNorm = 125.0575, GNorm = 0.3730, lr_0 = 7.3862e-04
Loss = 4.6318e-02, PNorm = 125.1233, GNorm = 0.7005, lr_0 = 7.3812e-04
Loss = 4.4029e-02, PNorm = 125.1938, GNorm = 0.9196, lr_0 = 7.3761e-04
Loss = 4.9322e-02, PNorm = 125.2710, GNorm = 0.5103, lr_0 = 7.3711e-04
Loss = 4.7917e-02, PNorm = 125.3445, GNorm = 0.4471, lr_0 = 7.3660e-04
Loss = 4.0562e-02, PNorm = 125.4190, GNorm = 0.4887, lr_0 = 7.3610e-04
Loss = 4.3598e-02, PNorm = 125.4888, GNorm = 0.4144, lr_0 = 7.3559e-04
Loss = 3.8873e-02, PNorm = 125.5559, GNorm = 0.4247, lr_0 = 7.3509e-04
Loss = 4.6973e-02, PNorm = 125.6243, GNorm = 0.3014, lr_0 = 7.3458e-04
Loss = 4.4278e-02, PNorm = 125.7042, GNorm = 0.2639, lr_0 = 7.3408e-04
Loss = 5.6508e-02, PNorm = 125.7830, GNorm = 0.6379, lr_0 = 7.3358e-04
Loss = 4.5374e-02, PNorm = 125.8656, GNorm = 0.4006, lr_0 = 7.3308e-04
Loss = 4.6723e-02, PNorm = 125.9454, GNorm = 0.7103, lr_0 = 7.3257e-04
Loss = 4.6337e-02, PNorm = 126.0230, GNorm = 0.2458, lr_0 = 7.3207e-04
Loss = 3.9969e-02, PNorm = 126.0991, GNorm = 0.3082, lr_0 = 7.3157e-04
Loss = 4.5778e-02, PNorm = 126.1794, GNorm = 0.4683, lr_0 = 7.3107e-04
Loss = 4.8604e-02, PNorm = 126.2486, GNorm = 0.4652, lr_0 = 7.3057e-04
Loss = 4.7352e-02, PNorm = 126.3269, GNorm = 0.4021, lr_0 = 7.3007e-04
Loss = 4.3945e-02, PNorm = 126.4041, GNorm = 0.4020, lr_0 = 7.2957e-04
Loss = 4.8716e-02, PNorm = 126.4834, GNorm = 0.2742, lr_0 = 7.2907e-04
Loss = 3.9377e-02, PNorm = 126.5653, GNorm = 0.5572, lr_0 = 7.2857e-04
Loss = 5.0504e-02, PNorm = 126.6427, GNorm = 0.5420, lr_0 = 7.2807e-04
Loss = 3.7342e-02, PNorm = 126.7256, GNorm = 0.4422, lr_0 = 7.2757e-04
Loss = 5.0630e-02, PNorm = 126.8063, GNorm = 0.5647, lr_0 = 7.2707e-04
Loss = 4.6360e-02, PNorm = 126.8867, GNorm = 0.4510, lr_0 = 7.2657e-04
Loss = 4.5317e-02, PNorm = 126.9683, GNorm = 0.3113, lr_0 = 7.2608e-04
Loss = 4.7759e-02, PNorm = 127.0451, GNorm = 0.5632, lr_0 = 7.2558e-04
Loss = 4.4395e-02, PNorm = 127.1282, GNorm = 0.3843, lr_0 = 7.2508e-04
Loss = 4.4387e-02, PNorm = 127.1943, GNorm = 0.3990, lr_0 = 7.2458e-04
Loss = 5.1021e-02, PNorm = 127.2625, GNorm = 0.2962, lr_0 = 7.2409e-04
Loss = 4.8263e-02, PNorm = 127.3366, GNorm = 0.4533, lr_0 = 7.2359e-04
Loss = 4.9721e-02, PNorm = 127.4201, GNorm = 0.9076, lr_0 = 7.2310e-04
Loss = 5.2512e-02, PNorm = 127.5058, GNorm = 0.7888, lr_0 = 7.2260e-04
Loss = 4.0043e-02, PNorm = 127.5904, GNorm = 0.4851, lr_0 = 7.2211e-04
Loss = 4.6637e-02, PNorm = 127.6804, GNorm = 1.1278, lr_0 = 7.2161e-04
Loss = 5.1268e-02, PNorm = 127.7672, GNorm = 0.4132, lr_0 = 7.2112e-04
Loss = 4.8956e-02, PNorm = 127.8560, GNorm = 0.8706, lr_0 = 7.2062e-04
Loss = 5.2081e-02, PNorm = 127.9440, GNorm = 0.7034, lr_0 = 7.2013e-04
Loss = 6.1440e-02, PNorm = 128.0370, GNorm = 0.3104, lr_0 = 7.1964e-04
Validation mae = 0.287317
Epoch 6
Loss = 3.8766e-02, PNorm = 128.1181, GNorm = 0.9244, lr_0 = 7.1914e-04
Loss = 3.5041e-02, PNorm = 128.1840, GNorm = 0.5979, lr_0 = 7.1865e-04
Loss = 3.7505e-02, PNorm = 128.2351, GNorm = 0.8670, lr_0 = 7.1816e-04
Loss = 4.0053e-02, PNorm = 128.3023, GNorm = 0.2682, lr_0 = 7.1767e-04
Loss = 3.2441e-02, PNorm = 128.3515, GNorm = 0.2994, lr_0 = 7.1717e-04
Loss = 4.0449e-02, PNorm = 128.4053, GNorm = 1.2957, lr_0 = 7.1668e-04
Loss = 3.5022e-02, PNorm = 128.4671, GNorm = 0.2941, lr_0 = 7.1619e-04
Loss = 3.7235e-02, PNorm = 128.5197, GNorm = 0.1869, lr_0 = 7.1570e-04
Loss = 3.7541e-02, PNorm = 128.5808, GNorm = 0.8967, lr_0 = 7.1521e-04
Loss = 3.6159e-02, PNorm = 128.6173, GNorm = 0.1943, lr_0 = 7.1472e-04
Loss = 3.3968e-02, PNorm = 128.6747, GNorm = 0.6512, lr_0 = 7.1423e-04
Loss = 3.0699e-02, PNorm = 128.7227, GNorm = 0.2903, lr_0 = 7.1374e-04
Loss = 3.4767e-02, PNorm = 128.7816, GNorm = 0.3684, lr_0 = 7.1325e-04
Loss = 3.7000e-02, PNorm = 128.8330, GNorm = 0.5281, lr_0 = 7.1277e-04
Loss = 3.7581e-02, PNorm = 128.8973, GNorm = 0.8785, lr_0 = 7.1228e-04
Loss = 3.3096e-02, PNorm = 128.9541, GNorm = 0.2293, lr_0 = 7.1179e-04
Loss = 2.9722e-02, PNorm = 129.0028, GNorm = 0.4587, lr_0 = 7.1130e-04
Loss = 3.9552e-02, PNorm = 129.0575, GNorm = 0.4845, lr_0 = 7.1081e-04
Loss = 3.4302e-02, PNorm = 129.1156, GNorm = 0.3299, lr_0 = 7.1033e-04
Loss = 2.9666e-02, PNorm = 129.1710, GNorm = 0.4850, lr_0 = 7.0984e-04
Loss = 3.1456e-02, PNorm = 129.2253, GNorm = 0.4321, lr_0 = 7.0935e-04
Loss = 2.9428e-02, PNorm = 129.2852, GNorm = 0.3448, lr_0 = 7.0887e-04
Loss = 2.8628e-02, PNorm = 129.3391, GNorm = 0.2195, lr_0 = 7.0838e-04
Loss = 3.3108e-02, PNorm = 129.3913, GNorm = 0.4233, lr_0 = 7.0790e-04
Loss = 3.6261e-02, PNorm = 129.4489, GNorm = 0.2844, lr_0 = 7.0741e-04
Loss = 3.6780e-02, PNorm = 129.5046, GNorm = 0.5329, lr_0 = 7.0693e-04
Loss = 3.4215e-02, PNorm = 129.5568, GNorm = 0.3874, lr_0 = 7.0644e-04
Loss = 3.1662e-02, PNorm = 129.6092, GNorm = 0.3502, lr_0 = 7.0596e-04
Loss = 2.9600e-02, PNorm = 129.6667, GNorm = 0.4187, lr_0 = 7.0548e-04
Loss = 3.1099e-02, PNorm = 129.7175, GNorm = 0.6328, lr_0 = 7.0499e-04
Loss = 3.0312e-02, PNorm = 129.7778, GNorm = 0.3331, lr_0 = 7.0451e-04
Loss = 3.2624e-02, PNorm = 129.8272, GNorm = 0.5241, lr_0 = 7.0403e-04
Loss = 3.3055e-02, PNorm = 129.8794, GNorm = 0.9790, lr_0 = 7.0354e-04
Loss = 3.3002e-02, PNorm = 129.9303, GNorm = 0.5326, lr_0 = 7.0306e-04
Loss = 2.9763e-02, PNorm = 129.9771, GNorm = 0.5082, lr_0 = 7.0258e-04
Loss = 3.5256e-02, PNorm = 130.0252, GNorm = 0.4391, lr_0 = 7.0210e-04
Loss = 2.7467e-02, PNorm = 130.0851, GNorm = 0.7713, lr_0 = 7.0162e-04
Loss = 3.5595e-02, PNorm = 130.1426, GNorm = 0.3994, lr_0 = 7.0114e-04
Loss = 3.3693e-02, PNorm = 130.1984, GNorm = 0.9103, lr_0 = 7.0066e-04
Loss = 3.0751e-02, PNorm = 130.2521, GNorm = 0.2050, lr_0 = 7.0018e-04
Loss = 3.3424e-02, PNorm = 130.3060, GNorm = 0.7646, lr_0 = 6.9970e-04
Loss = 2.7561e-02, PNorm = 130.3664, GNorm = 0.2492, lr_0 = 6.9922e-04
Loss = 4.0927e-02, PNorm = 130.4156, GNorm = 0.6392, lr_0 = 6.9874e-04
Loss = 3.4474e-02, PNorm = 130.4794, GNorm = 0.2961, lr_0 = 6.9826e-04
Loss = 3.1060e-02, PNorm = 130.5441, GNorm = 0.2314, lr_0 = 6.9778e-04
Loss = 4.0682e-02, PNorm = 130.6109, GNorm = 0.3050, lr_0 = 6.9730e-04
Loss = 3.4837e-02, PNorm = 130.6704, GNorm = 0.8278, lr_0 = 6.9683e-04
Loss = 3.2363e-02, PNorm = 130.7428, GNorm = 0.3876, lr_0 = 6.9635e-04
Loss = 3.2972e-02, PNorm = 130.8062, GNorm = 0.5018, lr_0 = 6.9587e-04
Loss = 3.2929e-02, PNorm = 130.8659, GNorm = 0.3377, lr_0 = 6.9540e-04
Loss = 3.1959e-02, PNorm = 130.9246, GNorm = 0.3975, lr_0 = 6.9492e-04
Loss = 2.8894e-02, PNorm = 130.9792, GNorm = 0.2455, lr_0 = 6.9444e-04
Loss = 2.8420e-02, PNorm = 131.0334, GNorm = 0.2787, lr_0 = 6.9397e-04
Loss = 3.3578e-02, PNorm = 131.0878, GNorm = 0.5952, lr_0 = 6.9349e-04
Loss = 3.3879e-02, PNorm = 131.1442, GNorm = 0.2896, lr_0 = 6.9302e-04
Loss = 3.7171e-02, PNorm = 131.2067, GNorm = 0.3049, lr_0 = 6.9254e-04
Loss = 3.3025e-02, PNorm = 131.2700, GNorm = 0.2055, lr_0 = 6.9207e-04
Loss = 2.8695e-02, PNorm = 131.3349, GNorm = 0.5365, lr_0 = 6.9159e-04
Loss = 3.2406e-02, PNorm = 131.3920, GNorm = 0.6783, lr_0 = 6.9112e-04
Loss = 3.2377e-02, PNorm = 131.4560, GNorm = 0.5499, lr_0 = 6.9065e-04
Loss = 2.7299e-02, PNorm = 131.5148, GNorm = 0.2578, lr_0 = 6.9017e-04
Loss = 3.1128e-02, PNorm = 131.5709, GNorm = 0.3767, lr_0 = 6.8970e-04
Loss = 3.1818e-02, PNorm = 131.6249, GNorm = 0.4585, lr_0 = 6.8923e-04
Loss = 3.3675e-02, PNorm = 131.6838, GNorm = 0.2892, lr_0 = 6.8876e-04
Loss = 3.3966e-02, PNorm = 131.7379, GNorm = 0.5326, lr_0 = 6.8828e-04
Loss = 2.5494e-02, PNorm = 131.7970, GNorm = 0.9176, lr_0 = 6.8781e-04
Loss = 3.0672e-02, PNorm = 131.8536, GNorm = 0.7891, lr_0 = 6.8734e-04
Loss = 3.6896e-02, PNorm = 131.9120, GNorm = 0.5926, lr_0 = 6.8687e-04
Loss = 3.2090e-02, PNorm = 131.9743, GNorm = 0.3017, lr_0 = 6.8640e-04
Loss = 3.5305e-02, PNorm = 132.0346, GNorm = 1.0311, lr_0 = 6.8593e-04
Loss = 3.5178e-02, PNorm = 132.0967, GNorm = 0.6663, lr_0 = 6.8546e-04
Loss = 3.1186e-02, PNorm = 132.1604, GNorm = 0.5600, lr_0 = 6.8499e-04
Loss = 2.9648e-02, PNorm = 132.2235, GNorm = 0.3487, lr_0 = 6.8452e-04
Loss = 3.2923e-02, PNorm = 132.2849, GNorm = 0.3072, lr_0 = 6.8405e-04
Loss = 3.5971e-02, PNorm = 132.3577, GNorm = 0.2760, lr_0 = 6.8358e-04
Loss = 3.9108e-02, PNorm = 132.4185, GNorm = 0.8743, lr_0 = 6.8312e-04
Loss = 3.0002e-02, PNorm = 132.4914, GNorm = 0.3961, lr_0 = 6.8265e-04
Loss = 3.7041e-02, PNorm = 132.5522, GNorm = 0.8296, lr_0 = 6.8218e-04
Loss = 3.3731e-02, PNorm = 132.6242, GNorm = 0.3302, lr_0 = 6.8171e-04
Loss = 3.6513e-02, PNorm = 132.6937, GNorm = 0.3813, lr_0 = 6.8125e-04
Loss = 3.8859e-02, PNorm = 132.7629, GNorm = 0.5390, lr_0 = 6.8078e-04
Loss = 3.7736e-02, PNorm = 132.8285, GNorm = 0.6420, lr_0 = 6.8031e-04
Loss = 3.2950e-02, PNorm = 132.8911, GNorm = 0.4950, lr_0 = 6.7985e-04
Loss = 3.9389e-02, PNorm = 132.9518, GNorm = 0.1893, lr_0 = 6.7938e-04
Loss = 3.3216e-02, PNorm = 133.0152, GNorm = 0.2528, lr_0 = 6.7892e-04
Loss = 3.0048e-02, PNorm = 133.0822, GNorm = 0.3280, lr_0 = 6.7845e-04
Loss = 3.4873e-02, PNorm = 133.1480, GNorm = 0.3135, lr_0 = 6.7799e-04
Loss = 2.9348e-02, PNorm = 133.2158, GNorm = 0.2571, lr_0 = 6.7752e-04
Loss = 4.4420e-02, PNorm = 133.2844, GNorm = 0.2367, lr_0 = 6.7706e-04
Loss = 3.9197e-02, PNorm = 133.3583, GNorm = 0.2190, lr_0 = 6.7659e-04
Loss = 4.0993e-02, PNorm = 133.4275, GNorm = 0.3730, lr_0 = 6.7613e-04
Loss = 3.4111e-02, PNorm = 133.4946, GNorm = 0.4277, lr_0 = 6.7567e-04
Loss = 2.8667e-02, PNorm = 133.5605, GNorm = 0.2391, lr_0 = 6.7520e-04
Loss = 3.3224e-02, PNorm = 133.6244, GNorm = 0.4496, lr_0 = 6.7474e-04
Loss = 3.3769e-02, PNorm = 133.6943, GNorm = 0.8315, lr_0 = 6.7428e-04
Loss = 3.8291e-02, PNorm = 133.7584, GNorm = 0.4108, lr_0 = 6.7382e-04
Loss = 3.4041e-02, PNorm = 133.8304, GNorm = 0.3658, lr_0 = 6.7335e-04
Loss = 3.6688e-02, PNorm = 133.9008, GNorm = 0.3638, lr_0 = 6.7289e-04
Loss = 4.1352e-02, PNorm = 133.9729, GNorm = 0.4516, lr_0 = 6.7243e-04
Loss = 3.6008e-02, PNorm = 134.0418, GNorm = 0.3711, lr_0 = 6.7197e-04
Loss = 3.4482e-02, PNorm = 134.1131, GNorm = 0.4418, lr_0 = 6.7151e-04
Loss = 3.0799e-02, PNorm = 134.1858, GNorm = 0.4178, lr_0 = 6.7105e-04
Loss = 3.5236e-02, PNorm = 134.2551, GNorm = 0.5778, lr_0 = 6.7059e-04
Loss = 3.0843e-02, PNorm = 134.3242, GNorm = 0.3896, lr_0 = 6.7013e-04
Loss = 4.0325e-02, PNorm = 134.3982, GNorm = 0.5395, lr_0 = 6.6967e-04
Loss = 3.4493e-02, PNorm = 134.4736, GNorm = 0.5593, lr_0 = 6.6921e-04
Loss = 4.1850e-02, PNorm = 134.5486, GNorm = 0.5117, lr_0 = 6.6876e-04
Loss = 3.8566e-02, PNorm = 134.6247, GNorm = 0.4282, lr_0 = 6.6830e-04
Loss = 4.0655e-02, PNorm = 134.7021, GNorm = 0.4556, lr_0 = 6.6784e-04
Loss = 3.5937e-02, PNorm = 134.7802, GNorm = 1.0265, lr_0 = 6.6738e-04
Loss = 3.5536e-02, PNorm = 134.8467, GNorm = 0.5471, lr_0 = 6.6693e-04
Loss = 3.7900e-02, PNorm = 134.9140, GNorm = 0.5923, lr_0 = 6.6647e-04
Loss = 3.7395e-02, PNorm = 134.9787, GNorm = 0.6781, lr_0 = 6.6601e-04
Loss = 4.8108e-02, PNorm = 135.0586, GNorm = 0.4838, lr_0 = 6.6556e-04
Loss = 4.2651e-02, PNorm = 135.1394, GNorm = 0.7285, lr_0 = 6.6510e-04
Loss = 3.8540e-02, PNorm = 135.2209, GNorm = 0.5729, lr_0 = 6.6464e-04
Loss = 3.9679e-02, PNorm = 135.2977, GNorm = 0.4837, lr_0 = 6.6419e-04
Loss = 3.9571e-02, PNorm = 135.3799, GNorm = 0.5254, lr_0 = 6.6373e-04
Loss = 4.5974e-02, PNorm = 135.4555, GNorm = 1.5130, lr_0 = 6.6328e-04
Loss = 3.7963e-02, PNorm = 135.5360, GNorm = 0.3419, lr_0 = 6.6282e-04
Validation mae = 0.286745
Epoch 7
Loss = 3.2871e-02, PNorm = 135.5979, GNorm = 0.4083, lr_0 = 6.6237e-04
Loss = 3.3292e-02, PNorm = 135.6565, GNorm = 0.5916, lr_0 = 6.6192e-04
Loss = 2.9491e-02, PNorm = 135.7138, GNorm = 0.6563, lr_0 = 6.6146e-04
Loss = 2.7160e-02, PNorm = 135.7675, GNorm = 0.3512, lr_0 = 6.6101e-04
Loss = 3.3874e-02, PNorm = 135.8168, GNorm = 0.4408, lr_0 = 6.6056e-04
Loss = 2.9158e-02, PNorm = 135.8645, GNorm = 0.3947, lr_0 = 6.6011e-04
Loss = 2.5961e-02, PNorm = 135.9087, GNorm = 0.4936, lr_0 = 6.5965e-04
Loss = 2.6258e-02, PNorm = 135.9529, GNorm = 0.7715, lr_0 = 6.5920e-04
Loss = 2.4661e-02, PNorm = 135.9932, GNorm = 0.5467, lr_0 = 6.5875e-04
Loss = 2.5874e-02, PNorm = 136.0371, GNorm = 0.4585, lr_0 = 6.5830e-04
Loss = 2.5602e-02, PNorm = 136.0781, GNorm = 0.2718, lr_0 = 6.5785e-04
Loss = 2.4008e-02, PNorm = 136.1199, GNorm = 0.3140, lr_0 = 6.5740e-04
Loss = 3.1452e-02, PNorm = 136.1652, GNorm = 0.3901, lr_0 = 6.5695e-04
Loss = 2.5058e-02, PNorm = 136.2144, GNorm = 0.6607, lr_0 = 6.5650e-04
Loss = 2.5373e-02, PNorm = 136.2596, GNorm = 0.3453, lr_0 = 6.5605e-04
Loss = 2.7351e-02, PNorm = 136.3073, GNorm = 0.2567, lr_0 = 6.5560e-04
Loss = 2.8657e-02, PNorm = 136.3520, GNorm = 0.3859, lr_0 = 6.5515e-04
Loss = 2.6004e-02, PNorm = 136.3954, GNorm = 0.3273, lr_0 = 6.5470e-04
Loss = 2.7015e-02, PNorm = 136.4296, GNorm = 0.6454, lr_0 = 6.5425e-04
Loss = 2.6507e-02, PNorm = 136.4800, GNorm = 0.4179, lr_0 = 6.5380e-04
Loss = 2.5981e-02, PNorm = 136.5279, GNorm = 0.5233, lr_0 = 6.5335e-04
Loss = 2.5498e-02, PNorm = 136.5808, GNorm = 0.4260, lr_0 = 6.5291e-04
Loss = 2.3666e-02, PNorm = 136.6281, GNorm = 0.2536, lr_0 = 6.5246e-04
Loss = 2.6490e-02, PNorm = 136.6803, GNorm = 0.5502, lr_0 = 6.5201e-04
Loss = 3.0487e-02, PNorm = 136.7264, GNorm = 0.1866, lr_0 = 6.5157e-04
Loss = 2.7900e-02, PNorm = 136.7767, GNorm = 0.5468, lr_0 = 6.5112e-04
Loss = 2.6641e-02, PNorm = 136.8224, GNorm = 0.4153, lr_0 = 6.5067e-04
Loss = 2.7148e-02, PNorm = 136.8739, GNorm = 0.3482, lr_0 = 6.5023e-04
Loss = 2.6181e-02, PNorm = 136.9162, GNorm = 0.4542, lr_0 = 6.4978e-04
Loss = 2.6190e-02, PNorm = 136.9671, GNorm = 0.3707, lr_0 = 6.4934e-04
Loss = 2.4261e-02, PNorm = 137.0151, GNorm = 0.1941, lr_0 = 6.4889e-04
Loss = 2.6747e-02, PNorm = 137.0601, GNorm = 0.3461, lr_0 = 6.4845e-04
Loss = 2.6517e-02, PNorm = 137.1057, GNorm = 0.2371, lr_0 = 6.4800e-04
Loss = 2.2601e-02, PNorm = 137.1520, GNorm = 0.7848, lr_0 = 6.4756e-04
Loss = 2.5546e-02, PNorm = 137.1935, GNorm = 0.4017, lr_0 = 6.4712e-04
Loss = 3.0203e-02, PNorm = 137.2374, GNorm = 0.5678, lr_0 = 6.4667e-04
Loss = 2.5844e-02, PNorm = 137.2902, GNorm = 0.4273, lr_0 = 6.4623e-04
Loss = 2.3765e-02, PNorm = 137.3370, GNorm = 0.1558, lr_0 = 6.4579e-04
Loss = 2.5104e-02, PNorm = 137.3850, GNorm = 0.3044, lr_0 = 6.4534e-04
Loss = 2.8680e-02, PNorm = 137.4266, GNorm = 0.3910, lr_0 = 6.4490e-04
Loss = 3.4126e-02, PNorm = 137.4743, GNorm = 0.2070, lr_0 = 6.4446e-04
Loss = 2.4763e-02, PNorm = 137.5209, GNorm = 0.4716, lr_0 = 6.4402e-04
Loss = 2.7863e-02, PNorm = 137.5723, GNorm = 0.6747, lr_0 = 6.4358e-04
Loss = 2.5028e-02, PNorm = 137.6230, GNorm = 0.1864, lr_0 = 6.4314e-04
Loss = 2.5598e-02, PNorm = 137.6737, GNorm = 0.4261, lr_0 = 6.4270e-04
Loss = 2.3876e-02, PNorm = 137.7256, GNorm = 0.1982, lr_0 = 6.4226e-04
Loss = 2.6373e-02, PNorm = 137.7755, GNorm = 0.3679, lr_0 = 6.4182e-04
Loss = 2.7710e-02, PNorm = 137.8321, GNorm = 0.2799, lr_0 = 6.4138e-04
Loss = 3.0252e-02, PNorm = 137.8834, GNorm = 0.4628, lr_0 = 6.4094e-04
Loss = 2.4952e-02, PNorm = 137.9389, GNorm = 0.3594, lr_0 = 6.4050e-04
Loss = 2.9457e-02, PNorm = 137.9826, GNorm = 0.2002, lr_0 = 6.4006e-04
Loss = 2.5188e-02, PNorm = 138.0391, GNorm = 0.4400, lr_0 = 6.3962e-04
Loss = 2.7727e-02, PNorm = 138.0870, GNorm = 0.4211, lr_0 = 6.3918e-04
Loss = 2.9419e-02, PNorm = 138.1388, GNorm = 0.9004, lr_0 = 6.3874e-04
Loss = 2.4331e-02, PNorm = 138.1964, GNorm = 0.8235, lr_0 = 6.3831e-04
Loss = 2.7114e-02, PNorm = 138.2479, GNorm = 0.8000, lr_0 = 6.3787e-04
Loss = 2.6450e-02, PNorm = 138.2976, GNorm = 0.1928, lr_0 = 6.3743e-04
Loss = 2.6958e-02, PNorm = 138.3528, GNorm = 0.2066, lr_0 = 6.3700e-04
Loss = 2.9205e-02, PNorm = 138.4112, GNorm = 0.6243, lr_0 = 6.3656e-04
Loss = 2.8635e-02, PNorm = 138.4689, GNorm = 0.5287, lr_0 = 6.3612e-04
Loss = 2.5665e-02, PNorm = 138.5190, GNorm = 0.4312, lr_0 = 6.3569e-04
Loss = 2.5572e-02, PNorm = 138.5645, GNorm = 0.3908, lr_0 = 6.3525e-04
Loss = 3.0266e-02, PNorm = 138.6110, GNorm = 0.5223, lr_0 = 6.3482e-04
Loss = 2.8344e-02, PNorm = 138.6624, GNorm = 0.6911, lr_0 = 6.3438e-04
Loss = 3.0034e-02, PNorm = 138.7176, GNorm = 0.2710, lr_0 = 6.3395e-04
Loss = 2.6394e-02, PNorm = 138.7721, GNorm = 1.4781, lr_0 = 6.3351e-04
Loss = 2.9036e-02, PNorm = 138.8263, GNorm = 0.5144, lr_0 = 6.3308e-04
Loss = 2.6063e-02, PNorm = 138.8885, GNorm = 0.1729, lr_0 = 6.3265e-04
Loss = 2.3493e-02, PNorm = 138.9463, GNorm = 0.4200, lr_0 = 6.3221e-04
Loss = 2.8600e-02, PNorm = 139.0006, GNorm = 0.3338, lr_0 = 6.3178e-04
Loss = 2.4460e-02, PNorm = 139.0530, GNorm = 0.2394, lr_0 = 6.3135e-04
Loss = 2.8219e-02, PNorm = 139.1144, GNorm = 0.2540, lr_0 = 6.3091e-04
Loss = 2.2825e-02, PNorm = 139.1705, GNorm = 0.3988, lr_0 = 6.3048e-04
Loss = 3.2216e-02, PNorm = 139.2262, GNorm = 0.3772, lr_0 = 6.3005e-04
Loss = 3.6474e-02, PNorm = 139.2823, GNorm = 0.6234, lr_0 = 6.2962e-04
Loss = 2.5973e-02, PNorm = 139.3408, GNorm = 0.4633, lr_0 = 6.2919e-04
Loss = 2.6087e-02, PNorm = 139.4014, GNorm = 0.6625, lr_0 = 6.2876e-04
Loss = 2.7635e-02, PNorm = 139.4606, GNorm = 0.2572, lr_0 = 6.2833e-04
Loss = 2.4784e-02, PNorm = 139.5170, GNorm = 0.6870, lr_0 = 6.2789e-04
Loss = 2.6381e-02, PNorm = 139.5727, GNorm = 0.2828, lr_0 = 6.2746e-04
Loss = 2.5693e-02, PNorm = 139.6253, GNorm = 0.2732, lr_0 = 6.2703e-04
Loss = 2.7277e-02, PNorm = 139.6844, GNorm = 0.4524, lr_0 = 6.2661e-04
Loss = 2.6913e-02, PNorm = 139.7426, GNorm = 0.5200, lr_0 = 6.2618e-04
Loss = 2.6177e-02, PNorm = 139.7938, GNorm = 0.2093, lr_0 = 6.2575e-04
Loss = 2.5757e-02, PNorm = 139.8469, GNorm = 0.3057, lr_0 = 6.2532e-04
Loss = 2.6488e-02, PNorm = 139.9021, GNorm = 0.3616, lr_0 = 6.2489e-04
Loss = 2.9161e-02, PNorm = 139.9675, GNorm = 0.5404, lr_0 = 6.2446e-04
Loss = 2.6349e-02, PNorm = 140.0227, GNorm = 0.6216, lr_0 = 6.2403e-04
Loss = 2.8896e-02, PNorm = 140.0747, GNorm = 0.2875, lr_0 = 6.2361e-04
Loss = 2.7595e-02, PNorm = 140.1339, GNorm = 0.6524, lr_0 = 6.2318e-04
Loss = 2.4834e-02, PNorm = 140.1891, GNorm = 0.2966, lr_0 = 6.2275e-04
Loss = 2.8830e-02, PNorm = 140.2463, GNorm = 0.2800, lr_0 = 6.2233e-04
Loss = 2.3323e-02, PNorm = 140.2991, GNorm = 0.1710, lr_0 = 6.2190e-04
Loss = 2.4714e-02, PNorm = 140.3498, GNorm = 0.3820, lr_0 = 6.2147e-04
Loss = 2.5027e-02, PNorm = 140.4047, GNorm = 0.9397, lr_0 = 6.2105e-04
Loss = 2.6209e-02, PNorm = 140.4582, GNorm = 0.4184, lr_0 = 6.2062e-04
Loss = 3.0970e-02, PNorm = 140.5094, GNorm = 0.4858, lr_0 = 6.2020e-04
Loss = 3.0938e-02, PNorm = 140.5693, GNorm = 0.8437, lr_0 = 6.1977e-04
Loss = 2.7918e-02, PNorm = 140.6332, GNorm = 0.3100, lr_0 = 6.1935e-04
Loss = 2.7752e-02, PNorm = 140.6931, GNorm = 0.3610, lr_0 = 6.1892e-04
Loss = 2.8045e-02, PNorm = 140.7540, GNorm = 0.6478, lr_0 = 6.1850e-04
Loss = 3.2208e-02, PNorm = 140.8105, GNorm = 0.3742, lr_0 = 6.1808e-04
Loss = 2.7713e-02, PNorm = 140.8708, GNorm = 0.4401, lr_0 = 6.1765e-04
Loss = 3.0602e-02, PNorm = 140.9277, GNorm = 0.2350, lr_0 = 6.1723e-04
Loss = 2.3883e-02, PNorm = 140.9822, GNorm = 0.4475, lr_0 = 6.1681e-04
Loss = 2.4830e-02, PNorm = 141.0340, GNorm = 0.5824, lr_0 = 6.1638e-04
Loss = 2.9940e-02, PNorm = 141.0897, GNorm = 0.4830, lr_0 = 6.1596e-04
Loss = 2.5008e-02, PNorm = 141.1419, GNorm = 0.4321, lr_0 = 6.1554e-04
Loss = 3.1409e-02, PNorm = 141.2043, GNorm = 0.5061, lr_0 = 6.1512e-04
Loss = 3.1836e-02, PNorm = 141.2659, GNorm = 0.6869, lr_0 = 6.1470e-04
Loss = 2.6713e-02, PNorm = 141.3295, GNorm = 0.4834, lr_0 = 6.1428e-04
Loss = 2.8231e-02, PNorm = 141.3909, GNorm = 0.4703, lr_0 = 6.1385e-04
Loss = 3.2454e-02, PNorm = 141.4523, GNorm = 0.5024, lr_0 = 6.1343e-04
Loss = 2.9058e-02, PNorm = 141.5122, GNorm = 0.5719, lr_0 = 6.1301e-04
Loss = 2.6259e-02, PNorm = 141.5732, GNorm = 0.2546, lr_0 = 6.1259e-04
Loss = 3.4426e-02, PNorm = 141.6305, GNorm = 0.7038, lr_0 = 6.1217e-04
Loss = 2.5861e-02, PNorm = 141.6912, GNorm = 0.1845, lr_0 = 6.1175e-04
Loss = 2.7557e-02, PNorm = 141.7500, GNorm = 0.3103, lr_0 = 6.1134e-04
Loss = 3.0603e-02, PNorm = 141.8015, GNorm = 0.4254, lr_0 = 6.1092e-04
Loss = 2.7505e-02, PNorm = 141.8563, GNorm = 0.4258, lr_0 = 6.1050e-04
Validation mae = 0.284751
Epoch 8
Loss = 2.5774e-02, PNorm = 141.9045, GNorm = 0.1756, lr_0 = 6.1008e-04
Loss = 2.2825e-02, PNorm = 141.9481, GNorm = 0.3408, lr_0 = 6.0966e-04
Loss = 2.8312e-02, PNorm = 141.9853, GNorm = 0.3338, lr_0 = 6.0924e-04
Loss = 2.0538e-02, PNorm = 142.0285, GNorm = 0.2847, lr_0 = 6.0883e-04
Loss = 2.1769e-02, PNorm = 142.0699, GNorm = 0.3648, lr_0 = 6.0841e-04
Loss = 2.0775e-02, PNorm = 142.1125, GNorm = 0.6720, lr_0 = 6.0799e-04
Loss = 2.4308e-02, PNorm = 142.1503, GNorm = 0.9345, lr_0 = 6.0758e-04
Loss = 2.3542e-02, PNorm = 142.1845, GNorm = 0.3631, lr_0 = 6.0716e-04
Loss = 1.8392e-02, PNorm = 142.2280, GNorm = 0.4403, lr_0 = 6.0674e-04
Loss = 2.0176e-02, PNorm = 142.2634, GNorm = 0.5019, lr_0 = 6.0633e-04
Loss = 2.5661e-02, PNorm = 142.2989, GNorm = 0.4337, lr_0 = 6.0591e-04
Loss = 2.0505e-02, PNorm = 142.3359, GNorm = 0.2781, lr_0 = 6.0550e-04
Loss = 2.3164e-02, PNorm = 142.3771, GNorm = 0.5737, lr_0 = 6.0508e-04
Loss = 1.8005e-02, PNorm = 142.4167, GNorm = 0.1461, lr_0 = 6.0467e-04
Loss = 2.4827e-02, PNorm = 142.4596, GNorm = 0.6858, lr_0 = 6.0425e-04
Loss = 2.3070e-02, PNorm = 142.4939, GNorm = 0.3969, lr_0 = 6.0384e-04
Loss = 2.6217e-02, PNorm = 142.5336, GNorm = 0.4153, lr_0 = 6.0343e-04
Loss = 2.6554e-02, PNorm = 142.5732, GNorm = 0.6029, lr_0 = 6.0301e-04
Loss = 1.9908e-02, PNorm = 142.6128, GNorm = 1.0057, lr_0 = 6.0260e-04
Loss = 2.1868e-02, PNorm = 142.6567, GNorm = 0.2654, lr_0 = 6.0219e-04
Loss = 2.0524e-02, PNorm = 142.6947, GNorm = 0.6616, lr_0 = 6.0178e-04
Loss = 2.2894e-02, PNorm = 142.7376, GNorm = 0.3456, lr_0 = 6.0136e-04
Loss = 1.9739e-02, PNorm = 142.7774, GNorm = 0.4606, lr_0 = 6.0095e-04
Loss = 2.0261e-02, PNorm = 142.8248, GNorm = 0.4198, lr_0 = 6.0054e-04
Loss = 2.3411e-02, PNorm = 142.8658, GNorm = 0.3645, lr_0 = 6.0013e-04
Loss = 2.2121e-02, PNorm = 142.9075, GNorm = 0.4894, lr_0 = 5.9972e-04
Loss = 1.9271e-02, PNorm = 142.9472, GNorm = 0.4092, lr_0 = 5.9931e-04
Loss = 2.4065e-02, PNorm = 142.9887, GNorm = 0.7064, lr_0 = 5.9890e-04
Loss = 2.1324e-02, PNorm = 143.0313, GNorm = 0.5546, lr_0 = 5.9849e-04
Loss = 2.0082e-02, PNorm = 143.0768, GNorm = 0.3615, lr_0 = 5.9808e-04
Loss = 1.9615e-02, PNorm = 143.1169, GNorm = 0.2200, lr_0 = 5.9767e-04
Loss = 2.0785e-02, PNorm = 143.1580, GNorm = 0.2744, lr_0 = 5.9726e-04
Loss = 1.7726e-02, PNorm = 143.1960, GNorm = 0.2247, lr_0 = 5.9685e-04
Loss = 1.9579e-02, PNorm = 143.2359, GNorm = 0.3330, lr_0 = 5.9644e-04
Loss = 2.1233e-02, PNorm = 143.2768, GNorm = 0.3848, lr_0 = 5.9603e-04
Loss = 1.7716e-02, PNorm = 143.3171, GNorm = 0.4139, lr_0 = 5.9562e-04
Loss = 2.4903e-02, PNorm = 143.3572, GNorm = 0.3641, lr_0 = 5.9521e-04
Loss = 2.0050e-02, PNorm = 143.4035, GNorm = 0.9395, lr_0 = 5.9481e-04
Loss = 1.9642e-02, PNorm = 143.4466, GNorm = 0.2657, lr_0 = 5.9440e-04
Loss = 2.2821e-02, PNorm = 143.4974, GNorm = 0.1869, lr_0 = 5.9399e-04
Loss = 2.1645e-02, PNorm = 143.5368, GNorm = 0.3398, lr_0 = 5.9358e-04
Loss = 2.2250e-02, PNorm = 143.5815, GNorm = 0.6082, lr_0 = 5.9318e-04
Loss = 2.0182e-02, PNorm = 143.6217, GNorm = 0.6293, lr_0 = 5.9277e-04
Loss = 2.1344e-02, PNorm = 143.6710, GNorm = 0.4249, lr_0 = 5.9236e-04
Loss = 2.3221e-02, PNorm = 143.7117, GNorm = 1.0599, lr_0 = 5.9196e-04
Loss = 2.2606e-02, PNorm = 143.7595, GNorm = 0.3707, lr_0 = 5.9155e-04
Loss = 1.7982e-02, PNorm = 143.8003, GNorm = 0.7123, lr_0 = 5.9115e-04
Loss = 2.1824e-02, PNorm = 143.8413, GNorm = 0.3976, lr_0 = 5.9074e-04
Loss = 2.3589e-02, PNorm = 143.8813, GNorm = 0.2599, lr_0 = 5.9034e-04
Loss = 2.3078e-02, PNorm = 143.9227, GNorm = 0.5165, lr_0 = 5.8993e-04
Loss = 1.9167e-02, PNorm = 143.9629, GNorm = 0.1935, lr_0 = 5.8953e-04
Loss = 1.8480e-02, PNorm = 144.0069, GNorm = 0.3614, lr_0 = 5.8913e-04
Loss = 2.0787e-02, PNorm = 144.0434, GNorm = 0.7270, lr_0 = 5.8872e-04
Loss = 2.1676e-02, PNorm = 144.0879, GNorm = 0.6726, lr_0 = 5.8832e-04
Loss = 2.1368e-02, PNorm = 144.1307, GNorm = 0.2469, lr_0 = 5.8792e-04
Loss = 2.2209e-02, PNorm = 144.1781, GNorm = 0.3126, lr_0 = 5.8751e-04
Loss = 2.5859e-02, PNorm = 144.2260, GNorm = 0.2456, lr_0 = 5.8711e-04
Loss = 2.0773e-02, PNorm = 144.2735, GNorm = 0.5592, lr_0 = 5.8671e-04
Loss = 3.0186e-02, PNorm = 144.3199, GNorm = 0.1568, lr_0 = 5.8631e-04
Loss = 2.0695e-02, PNorm = 144.3665, GNorm = 0.3119, lr_0 = 5.8591e-04
Loss = 2.0753e-02, PNorm = 144.4079, GNorm = 0.6804, lr_0 = 5.8550e-04
Loss = 2.0360e-02, PNorm = 144.4510, GNorm = 0.6834, lr_0 = 5.8510e-04
Loss = 2.3329e-02, PNorm = 144.4985, GNorm = 0.2458, lr_0 = 5.8470e-04
Loss = 2.2792e-02, PNorm = 144.5455, GNorm = 0.5061, lr_0 = 5.8430e-04
Loss = 2.1141e-02, PNorm = 144.5942, GNorm = 0.2512, lr_0 = 5.8390e-04
Loss = 2.3032e-02, PNorm = 144.6420, GNorm = 0.5335, lr_0 = 5.8350e-04
Loss = 1.7386e-02, PNorm = 144.6885, GNorm = 0.4669, lr_0 = 5.8310e-04
Loss = 1.9652e-02, PNorm = 144.7301, GNorm = 0.3632, lr_0 = 5.8270e-04
Loss = 2.3831e-02, PNorm = 144.7726, GNorm = 0.4528, lr_0 = 5.8230e-04
Loss = 2.0620e-02, PNorm = 144.8205, GNorm = 0.4900, lr_0 = 5.8190e-04
Loss = 2.0887e-02, PNorm = 144.8672, GNorm = 0.1643, lr_0 = 5.8151e-04
Loss = 1.9625e-02, PNorm = 144.9159, GNorm = 0.5499, lr_0 = 5.8111e-04
Loss = 2.2589e-02, PNorm = 144.9667, GNorm = 0.3428, lr_0 = 5.8071e-04
Loss = 2.2334e-02, PNorm = 145.0159, GNorm = 0.2516, lr_0 = 5.8031e-04
Loss = 2.6981e-02, PNorm = 145.0648, GNorm = 0.3224, lr_0 = 5.7991e-04
Loss = 2.0733e-02, PNorm = 145.1133, GNorm = 0.3801, lr_0 = 5.7952e-04
Loss = 2.3253e-02, PNorm = 145.1624, GNorm = 0.4668, lr_0 = 5.7912e-04
Loss = 2.4192e-02, PNorm = 145.2065, GNorm = 0.9672, lr_0 = 5.7872e-04
Loss = 1.9526e-02, PNorm = 145.2543, GNorm = 0.2460, lr_0 = 5.7833e-04
Loss = 1.8679e-02, PNorm = 145.2987, GNorm = 0.2368, lr_0 = 5.7793e-04
Loss = 2.2530e-02, PNorm = 145.3385, GNorm = 0.2813, lr_0 = 5.7753e-04
Loss = 2.0070e-02, PNorm = 145.3759, GNorm = 0.4198, lr_0 = 5.7714e-04
Loss = 2.0555e-02, PNorm = 145.4151, GNorm = 0.3976, lr_0 = 5.7674e-04
Loss = 1.8978e-02, PNorm = 145.4589, GNorm = 0.2246, lr_0 = 5.7635e-04
Loss = 2.0229e-02, PNorm = 145.5031, GNorm = 0.7667, lr_0 = 5.7595e-04
Loss = 2.2840e-02, PNorm = 145.5419, GNorm = 0.3128, lr_0 = 5.7556e-04
Loss = 1.9397e-02, PNorm = 145.5915, GNorm = 0.1952, lr_0 = 5.7516e-04
Loss = 2.3264e-02, PNorm = 145.6399, GNorm = 0.2080, lr_0 = 5.7477e-04
Loss = 2.3205e-02, PNorm = 145.6837, GNorm = 1.0219, lr_0 = 5.7438e-04
Loss = 2.3198e-02, PNorm = 145.7309, GNorm = 0.3479, lr_0 = 5.7398e-04
Loss = 1.9139e-02, PNorm = 145.7824, GNorm = 0.2977, lr_0 = 5.7359e-04
Loss = 2.1638e-02, PNorm = 145.8287, GNorm = 0.9335, lr_0 = 5.7320e-04
Loss = 2.1169e-02, PNorm = 145.8819, GNorm = 0.4331, lr_0 = 5.7280e-04
Loss = 2.3323e-02, PNorm = 145.9348, GNorm = 0.6027, lr_0 = 5.7241e-04
Loss = 2.1510e-02, PNorm = 145.9853, GNorm = 0.8253, lr_0 = 5.7202e-04
Loss = 1.9981e-02, PNorm = 146.0346, GNorm = 0.2364, lr_0 = 5.7163e-04
Loss = 2.4754e-02, PNorm = 146.0810, GNorm = 0.2519, lr_0 = 5.7124e-04
Loss = 2.1764e-02, PNorm = 146.1279, GNorm = 0.3134, lr_0 = 5.7084e-04
Loss = 1.9201e-02, PNorm = 146.1766, GNorm = 0.4711, lr_0 = 5.7045e-04
Loss = 2.4035e-02, PNorm = 146.2284, GNorm = 0.4518, lr_0 = 5.7006e-04
Loss = 2.4001e-02, PNorm = 146.2744, GNorm = 0.5647, lr_0 = 5.6967e-04
Loss = 2.5695e-02, PNorm = 146.3306, GNorm = 0.3815, lr_0 = 5.6928e-04
Loss = 2.6467e-02, PNorm = 146.3854, GNorm = 0.1897, lr_0 = 5.6889e-04
Loss = 2.1980e-02, PNorm = 146.4453, GNorm = 0.5387, lr_0 = 5.6850e-04
Loss = 2.3671e-02, PNorm = 146.4935, GNorm = 0.3678, lr_0 = 5.6811e-04
Loss = 2.0683e-02, PNorm = 146.5462, GNorm = 0.5607, lr_0 = 5.6772e-04
Loss = 2.8235e-02, PNorm = 146.5912, GNorm = 0.5927, lr_0 = 5.6733e-04
Loss = 2.1264e-02, PNorm = 146.6476, GNorm = 0.2650, lr_0 = 5.6695e-04
Loss = 2.8856e-02, PNorm = 146.6969, GNorm = 0.7992, lr_0 = 5.6656e-04
Loss = 2.4595e-02, PNorm = 146.7607, GNorm = 1.0143, lr_0 = 5.6617e-04
Loss = 2.2461e-02, PNorm = 146.8087, GNorm = 0.4253, lr_0 = 5.6578e-04
Loss = 2.3783e-02, PNorm = 146.8604, GNorm = 0.5191, lr_0 = 5.6539e-04
Loss = 2.4511e-02, PNorm = 146.9150, GNorm = 0.3145, lr_0 = 5.6501e-04
Loss = 2.3224e-02, PNorm = 146.9651, GNorm = 0.2156, lr_0 = 5.6462e-04
Loss = 1.7449e-02, PNorm = 147.0133, GNorm = 0.3026, lr_0 = 5.6423e-04
Loss = 2.3294e-02, PNorm = 147.0562, GNorm = 0.4527, lr_0 = 5.6385e-04
Loss = 2.3821e-02, PNorm = 147.1034, GNorm = 0.5008, lr_0 = 5.6346e-04
Loss = 2.4293e-02, PNorm = 147.1520, GNorm = 0.4569, lr_0 = 5.6307e-04
Loss = 2.0109e-02, PNorm = 147.2063, GNorm = 0.2747, lr_0 = 5.6269e-04
Loss = 2.1901e-02, PNorm = 147.2561, GNorm = 0.3838, lr_0 = 5.6230e-04
Validation mae = 0.283661
Epoch 9
Loss = 2.0523e-02, PNorm = 147.2964, GNorm = 0.4042, lr_0 = 5.6192e-04
Loss = 2.3492e-02, PNorm = 147.3326, GNorm = 0.3076, lr_0 = 5.6153e-04
Loss = 2.3499e-02, PNorm = 147.3732, GNorm = 0.5601, lr_0 = 5.6115e-04
Loss = 2.2250e-02, PNorm = 147.4140, GNorm = 0.5898, lr_0 = 5.6076e-04
Loss = 1.9786e-02, PNorm = 147.4545, GNorm = 0.3019, lr_0 = 5.6038e-04
Loss = 1.9424e-02, PNorm = 147.4956, GNorm = 0.5145, lr_0 = 5.6000e-04
Loss = 1.8489e-02, PNorm = 147.5291, GNorm = 0.7746, lr_0 = 5.5961e-04
Loss = 1.9172e-02, PNorm = 147.5687, GNorm = 0.3007, lr_0 = 5.5923e-04
Loss = 1.8970e-02, PNorm = 147.6055, GNorm = 0.3253, lr_0 = 5.5885e-04
Loss = 2.2524e-02, PNorm = 147.6446, GNorm = 0.1929, lr_0 = 5.5846e-04
Loss = 1.7042e-02, PNorm = 147.6842, GNorm = 0.1677, lr_0 = 5.5808e-04
Loss = 1.9931e-02, PNorm = 147.7158, GNorm = 0.4084, lr_0 = 5.5770e-04
Loss = 1.6038e-02, PNorm = 147.7488, GNorm = 0.6357, lr_0 = 5.5732e-04
Loss = 1.9140e-02, PNorm = 147.7782, GNorm = 0.2863, lr_0 = 5.5693e-04
Loss = 2.1849e-02, PNorm = 147.8062, GNorm = 0.6075, lr_0 = 5.5655e-04
Loss = 1.8608e-02, PNorm = 147.8420, GNorm = 0.2740, lr_0 = 5.5617e-04
Loss = 1.7681e-02, PNorm = 147.8766, GNorm = 0.2746, lr_0 = 5.5579e-04
Loss = 1.5625e-02, PNorm = 147.9082, GNorm = 0.2372, lr_0 = 5.5541e-04
Loss = 2.0077e-02, PNorm = 147.9429, GNorm = 0.9642, lr_0 = 5.5503e-04
Loss = 2.0379e-02, PNorm = 147.9800, GNorm = 0.8798, lr_0 = 5.5465e-04
Loss = 2.0328e-02, PNorm = 148.0134, GNorm = 0.5014, lr_0 = 5.5427e-04
Loss = 1.6881e-02, PNorm = 148.0458, GNorm = 0.2872, lr_0 = 5.5389e-04
Loss = 1.9388e-02, PNorm = 148.0747, GNorm = 0.3101, lr_0 = 5.5351e-04
Loss = 1.8419e-02, PNorm = 148.1095, GNorm = 0.3192, lr_0 = 5.5313e-04
Loss = 1.5913e-02, PNorm = 148.1446, GNorm = 0.2613, lr_0 = 5.5275e-04
Loss = 1.5311e-02, PNorm = 148.1791, GNorm = 0.6603, lr_0 = 5.5237e-04
Loss = 1.6334e-02, PNorm = 148.2105, GNorm = 0.2285, lr_0 = 5.5199e-04
Loss = 1.4367e-02, PNorm = 148.2408, GNorm = 0.4038, lr_0 = 5.5162e-04
Loss = 1.5951e-02, PNorm = 148.2757, GNorm = 0.8705, lr_0 = 5.5124e-04
Loss = 1.7314e-02, PNorm = 148.3045, GNorm = 0.4523, lr_0 = 5.5086e-04
Loss = 1.8868e-02, PNorm = 148.3380, GNorm = 0.2581, lr_0 = 5.5048e-04
Loss = 1.8113e-02, PNorm = 148.3680, GNorm = 0.4699, lr_0 = 5.5011e-04
Loss = 1.8693e-02, PNorm = 148.4031, GNorm = 0.6124, lr_0 = 5.4973e-04
Loss = 2.0226e-02, PNorm = 148.4364, GNorm = 0.1896, lr_0 = 5.4935e-04
Loss = 1.8492e-02, PNorm = 148.4716, GNorm = 0.5269, lr_0 = 5.4898e-04
Loss = 2.1237e-02, PNorm = 148.5086, GNorm = 0.9696, lr_0 = 5.4860e-04
Loss = 1.7654e-02, PNorm = 148.5472, GNorm = 0.2746, lr_0 = 5.4822e-04
Loss = 1.6876e-02, PNorm = 148.5886, GNorm = 0.2830, lr_0 = 5.4785e-04
Loss = 2.0097e-02, PNorm = 148.6255, GNorm = 0.3764, lr_0 = 5.4747e-04
Loss = 1.6073e-02, PNorm = 148.6616, GNorm = 0.4172, lr_0 = 5.4710e-04
Loss = 1.7243e-02, PNorm = 148.6957, GNorm = 0.6295, lr_0 = 5.4672e-04
Loss = 1.9630e-02, PNorm = 148.7304, GNorm = 0.4540, lr_0 = 5.4635e-04
Loss = 1.7056e-02, PNorm = 148.7660, GNorm = 0.1660, lr_0 = 5.4597e-04
Loss = 1.8731e-02, PNorm = 148.8076, GNorm = 0.5198, lr_0 = 5.4560e-04
Loss = 1.5374e-02, PNorm = 148.8486, GNorm = 0.1924, lr_0 = 5.4523e-04
Loss = 1.5290e-02, PNorm = 148.8846, GNorm = 0.5909, lr_0 = 5.4485e-04
Loss = 1.5095e-02, PNorm = 148.9157, GNorm = 0.2172, lr_0 = 5.4448e-04
Loss = 1.6439e-02, PNorm = 148.9526, GNorm = 0.2290, lr_0 = 5.4411e-04
Loss = 1.6047e-02, PNorm = 148.9859, GNorm = 0.2356, lr_0 = 5.4373e-04
Loss = 1.4503e-02, PNorm = 149.0159, GNorm = 0.1897, lr_0 = 5.4336e-04
Loss = 1.9885e-02, PNorm = 149.0479, GNorm = 0.3306, lr_0 = 5.4299e-04
Loss = 1.5888e-02, PNorm = 149.0858, GNorm = 0.3165, lr_0 = 5.4262e-04
Loss = 1.8777e-02, PNorm = 149.1208, GNorm = 0.3397, lr_0 = 5.4225e-04
Loss = 1.9691e-02, PNorm = 149.1613, GNorm = 0.4003, lr_0 = 5.4187e-04
Loss = 2.2452e-02, PNorm = 149.2004, GNorm = 0.7791, lr_0 = 5.4150e-04
Loss = 1.6642e-02, PNorm = 149.2452, GNorm = 0.3493, lr_0 = 5.4113e-04
Loss = 1.7595e-02, PNorm = 149.2854, GNorm = 0.2690, lr_0 = 5.4076e-04
Loss = 1.7737e-02, PNorm = 149.3264, GNorm = 0.1101, lr_0 = 5.4039e-04
Loss = 1.8832e-02, PNorm = 149.3638, GNorm = 0.3026, lr_0 = 5.4002e-04
Loss = 1.8094e-02, PNorm = 149.3983, GNorm = 0.2519, lr_0 = 5.3965e-04
Loss = 1.6729e-02, PNorm = 149.4352, GNorm = 0.4499, lr_0 = 5.3928e-04
Loss = 1.7264e-02, PNorm = 149.4762, GNorm = 0.1730, lr_0 = 5.3891e-04
Loss = 1.6520e-02, PNorm = 149.5216, GNorm = 0.1892, lr_0 = 5.3854e-04
Loss = 1.7016e-02, PNorm = 149.5609, GNorm = 0.1598, lr_0 = 5.3817e-04
Loss = 1.9752e-02, PNorm = 149.5993, GNorm = 0.1738, lr_0 = 5.3781e-04
Loss = 1.4188e-02, PNorm = 149.6385, GNorm = 0.1646, lr_0 = 5.3744e-04
Loss = 1.6419e-02, PNorm = 149.6787, GNorm = 0.2749, lr_0 = 5.3707e-04
Loss = 1.9435e-02, PNorm = 149.7153, GNorm = 0.1698, lr_0 = 5.3670e-04
Loss = 1.3890e-02, PNorm = 149.7506, GNorm = 0.3124, lr_0 = 5.3633e-04
Loss = 1.6671e-02, PNorm = 149.7837, GNorm = 0.2343, lr_0 = 5.3597e-04
Loss = 1.8897e-02, PNorm = 149.8211, GNorm = 0.1719, lr_0 = 5.3560e-04
Loss = 1.5458e-02, PNorm = 149.8604, GNorm = 0.3537, lr_0 = 5.3523e-04
Loss = 1.6276e-02, PNorm = 149.9038, GNorm = 0.1748, lr_0 = 5.3486e-04
Loss = 2.0458e-02, PNorm = 149.9380, GNorm = 0.1837, lr_0 = 5.3450e-04
Loss = 1.5416e-02, PNorm = 149.9816, GNorm = 0.2727, lr_0 = 5.3413e-04
Loss = 1.7138e-02, PNorm = 150.0178, GNorm = 0.2419, lr_0 = 5.3377e-04
Loss = 1.9428e-02, PNorm = 150.0583, GNorm = 0.2248, lr_0 = 5.3340e-04
Loss = 1.8415e-02, PNorm = 150.1012, GNorm = 0.5436, lr_0 = 5.3304e-04
Loss = 1.4616e-02, PNorm = 150.1414, GNorm = 0.4072, lr_0 = 5.3267e-04
Loss = 1.8029e-02, PNorm = 150.1802, GNorm = 0.2742, lr_0 = 5.3231e-04
Loss = 1.5700e-02, PNorm = 150.2205, GNorm = 0.2738, lr_0 = 5.3194e-04
Loss = 1.5048e-02, PNorm = 150.2568, GNorm = 0.5832, lr_0 = 5.3158e-04
Loss = 1.8202e-02, PNorm = 150.2937, GNorm = 0.3928, lr_0 = 5.3121e-04
Loss = 2.1277e-02, PNorm = 150.3310, GNorm = 0.3110, lr_0 = 5.3085e-04
Loss = 1.6758e-02, PNorm = 150.3723, GNorm = 0.1381, lr_0 = 5.3048e-04
Loss = 1.7405e-02, PNorm = 150.4151, GNorm = 0.2324, lr_0 = 5.3012e-04
Loss = 1.4839e-02, PNorm = 150.4544, GNorm = 0.3087, lr_0 = 5.2976e-04
Loss = 2.0366e-02, PNorm = 150.4905, GNorm = 0.2644, lr_0 = 5.2939e-04
Loss = 1.6115e-02, PNorm = 150.5249, GNorm = 0.2457, lr_0 = 5.2903e-04
Loss = 1.6434e-02, PNorm = 150.5597, GNorm = 0.2670, lr_0 = 5.2867e-04
Loss = 1.5819e-02, PNorm = 150.5948, GNorm = 0.2286, lr_0 = 5.2831e-04
Loss = 1.5166e-02, PNorm = 150.6315, GNorm = 0.2902, lr_0 = 5.2795e-04
Loss = 1.6733e-02, PNorm = 150.6665, GNorm = 0.5322, lr_0 = 5.2758e-04
Loss = 1.6054e-02, PNorm = 150.7034, GNorm = 0.2939, lr_0 = 5.2722e-04
Loss = 1.7587e-02, PNorm = 150.7454, GNorm = 0.4600, lr_0 = 5.2686e-04
Loss = 1.9656e-02, PNorm = 150.7859, GNorm = 0.3071, lr_0 = 5.2650e-04
Loss = 1.9005e-02, PNorm = 150.8321, GNorm = 0.5162, lr_0 = 5.2614e-04
Loss = 1.7315e-02, PNorm = 150.8729, GNorm = 0.5725, lr_0 = 5.2578e-04
Loss = 2.0708e-02, PNorm = 150.9152, GNorm = 0.3307, lr_0 = 5.2542e-04
Loss = 1.5338e-02, PNorm = 150.9632, GNorm = 0.2411, lr_0 = 5.2506e-04
Loss = 1.9073e-02, PNorm = 151.0101, GNorm = 0.8654, lr_0 = 5.2470e-04
Loss = 2.0126e-02, PNorm = 151.0544, GNorm = 0.3764, lr_0 = 5.2434e-04
Loss = 1.7951e-02, PNorm = 151.1039, GNorm = 0.1531, lr_0 = 5.2398e-04
Loss = 1.8074e-02, PNorm = 151.1437, GNorm = 0.5667, lr_0 = 5.2362e-04
Loss = 1.7519e-02, PNorm = 151.1885, GNorm = 0.4452, lr_0 = 5.2326e-04
Loss = 2.4990e-02, PNorm = 151.2326, GNorm = 0.4607, lr_0 = 5.2290e-04
Loss = 1.8740e-02, PNorm = 151.2719, GNorm = 1.2278, lr_0 = 5.2255e-04
Loss = 1.9422e-02, PNorm = 151.3115, GNorm = 0.5719, lr_0 = 5.2219e-04
Loss = 1.8796e-02, PNorm = 151.3538, GNorm = 0.1773, lr_0 = 5.2183e-04
Loss = 2.0044e-02, PNorm = 151.3995, GNorm = 0.5254, lr_0 = 5.2147e-04
Loss = 1.7790e-02, PNorm = 151.4497, GNorm = 0.2350, lr_0 = 5.2112e-04
Loss = 1.6767e-02, PNorm = 151.4920, GNorm = 0.5998, lr_0 = 5.2076e-04
Loss = 2.0306e-02, PNorm = 151.5352, GNorm = 0.2429, lr_0 = 5.2040e-04
Loss = 1.7595e-02, PNorm = 151.5725, GNorm = 0.3045, lr_0 = 5.2005e-04
Loss = 1.7340e-02, PNorm = 151.6126, GNorm = 0.2270, lr_0 = 5.1969e-04
Loss = 1.6309e-02, PNorm = 151.6543, GNorm = 0.4661, lr_0 = 5.1933e-04
Loss = 1.9342e-02, PNorm = 151.6974, GNorm = 0.1973, lr_0 = 5.1898e-04
Loss = 1.9633e-02, PNorm = 151.7426, GNorm = 0.9693, lr_0 = 5.1862e-04
Loss = 1.7600e-02, PNorm = 151.7844, GNorm = 0.2800, lr_0 = 5.1827e-04
Loss = 1.6111e-02, PNorm = 151.8274, GNorm = 0.1843, lr_0 = 5.1791e-04
Validation mae = 0.283161
Epoch 10
Loss = 1.6052e-02, PNorm = 151.8666, GNorm = 0.4949, lr_0 = 5.1756e-04
Loss = 1.3089e-02, PNorm = 151.9005, GNorm = 0.3569, lr_0 = 5.1720e-04
Loss = 1.6737e-02, PNorm = 151.9347, GNorm = 0.2941, lr_0 = 5.1685e-04
Loss = 1.5516e-02, PNorm = 151.9631, GNorm = 0.2523, lr_0 = 5.1649e-04
Loss = 1.8348e-02, PNorm = 151.9912, GNorm = 0.8858, lr_0 = 5.1614e-04
Loss = 1.3276e-02, PNorm = 152.0172, GNorm = 0.3358, lr_0 = 5.1579e-04
Loss = 1.6790e-02, PNorm = 152.0425, GNorm = 0.3040, lr_0 = 5.1543e-04
Loss = 1.5645e-02, PNorm = 152.0694, GNorm = 0.4233, lr_0 = 5.1508e-04
Loss = 1.3424e-02, PNorm = 152.1000, GNorm = 0.3223, lr_0 = 5.1473e-04
Loss = 1.3684e-02, PNorm = 152.1278, GNorm = 0.3413, lr_0 = 5.1437e-04
Loss = 1.3738e-02, PNorm = 152.1585, GNorm = 0.4336, lr_0 = 5.1402e-04
Loss = 1.6175e-02, PNorm = 152.1778, GNorm = 0.8828, lr_0 = 5.1367e-04
Loss = 1.3742e-02, PNorm = 152.2071, GNorm = 0.1284, lr_0 = 5.1332e-04
Loss = 1.5051e-02, PNorm = 152.2355, GNorm = 0.1847, lr_0 = 5.1297e-04
Loss = 1.6126e-02, PNorm = 152.2657, GNorm = 0.4024, lr_0 = 5.1262e-04
Loss = 1.2629e-02, PNorm = 152.2943, GNorm = 0.1362, lr_0 = 5.1226e-04
Loss = 1.4795e-02, PNorm = 152.3266, GNorm = 0.5839, lr_0 = 5.1191e-04
Loss = 1.7317e-02, PNorm = 152.3589, GNorm = 0.7217, lr_0 = 5.1156e-04
Loss = 1.4035e-02, PNorm = 152.3896, GNorm = 0.2555, lr_0 = 5.1121e-04
Loss = 1.2323e-02, PNorm = 152.4201, GNorm = 0.1760, lr_0 = 5.1086e-04
Loss = 1.3869e-02, PNorm = 152.4499, GNorm = 0.1995, lr_0 = 5.1051e-04
Loss = 1.7384e-02, PNorm = 152.4791, GNorm = 0.2723, lr_0 = 5.1016e-04
Loss = 1.5739e-02, PNorm = 152.5120, GNorm = 0.3079, lr_0 = 5.0981e-04
Loss = 1.5117e-02, PNorm = 152.5439, GNorm = 0.2679, lr_0 = 5.0946e-04
Loss = 1.2261e-02, PNorm = 152.5740, GNorm = 0.5097, lr_0 = 5.0911e-04
Loss = 1.4761e-02, PNorm = 152.6033, GNorm = 0.2797, lr_0 = 5.0877e-04
Loss = 1.4525e-02, PNorm = 152.6272, GNorm = 0.5807, lr_0 = 5.0842e-04
Loss = 1.3136e-02, PNorm = 152.6583, GNorm = 0.4846, lr_0 = 5.0807e-04
Loss = 1.4294e-02, PNorm = 152.6871, GNorm = 0.1280, lr_0 = 5.0772e-04
Loss = 1.3655e-02, PNorm = 152.7136, GNorm = 0.1181, lr_0 = 5.0737e-04
Loss = 1.3830e-02, PNorm = 152.7408, GNorm = 0.1506, lr_0 = 5.0703e-04
Loss = 1.6615e-02, PNorm = 152.7688, GNorm = 0.1932, lr_0 = 5.0668e-04
Loss = 1.3637e-02, PNorm = 152.7989, GNorm = 0.5732, lr_0 = 5.0633e-04
Loss = 1.3866e-02, PNorm = 152.8304, GNorm = 0.3449, lr_0 = 5.0598e-04
Loss = 1.3766e-02, PNorm = 152.8592, GNorm = 0.1372, lr_0 = 5.0564e-04
Loss = 1.8211e-02, PNorm = 152.8874, GNorm = 0.3709, lr_0 = 5.0529e-04
Loss = 1.3848e-02, PNorm = 152.9201, GNorm = 0.2525, lr_0 = 5.0494e-04
Loss = 1.1482e-02, PNorm = 152.9495, GNorm = 0.3804, lr_0 = 5.0460e-04
Loss = 1.5771e-02, PNorm = 152.9765, GNorm = 0.3173, lr_0 = 5.0425e-04
Loss = 1.3859e-02, PNorm = 153.0072, GNorm = 0.5031, lr_0 = 5.0391e-04
Loss = 1.3969e-02, PNorm = 153.0374, GNorm = 0.2836, lr_0 = 5.0356e-04
Loss = 1.4492e-02, PNorm = 153.0720, GNorm = 0.2160, lr_0 = 5.0322e-04
Loss = 1.2152e-02, PNorm = 153.1036, GNorm = 0.4123, lr_0 = 5.0287e-04
Loss = 1.6846e-02, PNorm = 153.1333, GNorm = 0.4211, lr_0 = 5.0253e-04
Loss = 1.3776e-02, PNorm = 153.1599, GNorm = 0.1930, lr_0 = 5.0218e-04
Loss = 1.7167e-02, PNorm = 153.1866, GNorm = 0.1973, lr_0 = 5.0184e-04
Loss = 1.3022e-02, PNorm = 153.2119, GNorm = 0.3595, lr_0 = 5.0150e-04
Loss = 1.2505e-02, PNorm = 153.2398, GNorm = 0.4523, lr_0 = 5.0115e-04
Loss = 1.2575e-02, PNorm = 153.2646, GNorm = 0.5304, lr_0 = 5.0081e-04
Loss = 1.6241e-02, PNorm = 153.2942, GNorm = 0.3872, lr_0 = 5.0047e-04
Loss = 1.2848e-02, PNorm = 153.3245, GNorm = 0.2954, lr_0 = 5.0012e-04
Loss = 1.7679e-02, PNorm = 153.3577, GNorm = 0.5404, lr_0 = 4.9978e-04
Loss = 1.5245e-02, PNorm = 153.3927, GNorm = 0.4798, lr_0 = 4.9944e-04
Loss = 1.2215e-02, PNorm = 153.4216, GNorm = 0.3734, lr_0 = 4.9910e-04
Loss = 1.4215e-02, PNorm = 153.4550, GNorm = 0.3603, lr_0 = 4.9875e-04
Loss = 1.7745e-02, PNorm = 153.4809, GNorm = 0.6497, lr_0 = 4.9841e-04
Loss = 1.6153e-02, PNorm = 153.5121, GNorm = 0.8720, lr_0 = 4.9807e-04
Loss = 1.3114e-02, PNorm = 153.5401, GNorm = 0.2278, lr_0 = 4.9773e-04
Loss = 1.2667e-02, PNorm = 153.5734, GNorm = 0.1636, lr_0 = 4.9739e-04
Loss = 1.4529e-02, PNorm = 153.6024, GNorm = 0.3169, lr_0 = 4.9705e-04
Loss = 1.5170e-02, PNorm = 153.6284, GNorm = 0.5143, lr_0 = 4.9671e-04
Loss = 1.4428e-02, PNorm = 153.6610, GNorm = 0.2103, lr_0 = 4.9637e-04
Loss = 1.8193e-02, PNorm = 153.6946, GNorm = 0.5443, lr_0 = 4.9603e-04
Loss = 1.4928e-02, PNorm = 153.7314, GNorm = 0.7469, lr_0 = 4.9569e-04
Loss = 1.6109e-02, PNorm = 153.7619, GNorm = 0.3200, lr_0 = 4.9535e-04
Loss = 1.5261e-02, PNorm = 153.7891, GNorm = 0.2068, lr_0 = 4.9501e-04
Loss = 1.3360e-02, PNorm = 153.8183, GNorm = 0.2554, lr_0 = 4.9467e-04
Loss = 1.4839e-02, PNorm = 153.8524, GNorm = 0.2865, lr_0 = 4.9433e-04
Loss = 1.3458e-02, PNorm = 153.8856, GNorm = 0.6293, lr_0 = 4.9399e-04
Loss = 1.3278e-02, PNorm = 153.9205, GNorm = 0.2648, lr_0 = 4.9365e-04
Loss = 1.5011e-02, PNorm = 153.9503, GNorm = 0.3885, lr_0 = 4.9332e-04
Loss = 1.3022e-02, PNorm = 153.9817, GNorm = 0.2637, lr_0 = 4.9298e-04
Loss = 1.4936e-02, PNorm = 154.0115, GNorm = 0.1595, lr_0 = 4.9264e-04
Loss = 1.5196e-02, PNorm = 154.0441, GNorm = 0.1502, lr_0 = 4.9230e-04
Loss = 1.2934e-02, PNorm = 154.0760, GNorm = 0.2830, lr_0 = 4.9197e-04
Loss = 1.3497e-02, PNorm = 154.1101, GNorm = 0.5002, lr_0 = 4.9163e-04
Loss = 1.3553e-02, PNorm = 154.1459, GNorm = 0.2657, lr_0 = 4.9129e-04
Loss = 1.5557e-02, PNorm = 154.1879, GNorm = 0.1106, lr_0 = 4.9095e-04
Loss = 1.4411e-02, PNorm = 154.2207, GNorm = 0.4657, lr_0 = 4.9062e-04
Loss = 1.2200e-02, PNorm = 154.2474, GNorm = 0.3073, lr_0 = 4.9028e-04
Loss = 1.2701e-02, PNorm = 154.2763, GNorm = 0.3962, lr_0 = 4.8995e-04
Loss = 1.6324e-02, PNorm = 154.3075, GNorm = 0.3104, lr_0 = 4.8961e-04
Loss = 1.2342e-02, PNorm = 154.3384, GNorm = 0.2393, lr_0 = 4.8928e-04
Loss = 1.8758e-02, PNorm = 154.3707, GNorm = 0.6146, lr_0 = 4.8894e-04
Loss = 1.2754e-02, PNorm = 154.4057, GNorm = 0.4456, lr_0 = 4.8861e-04
Loss = 1.3564e-02, PNorm = 154.4432, GNorm = 0.3177, lr_0 = 4.8827e-04
Loss = 1.3797e-02, PNorm = 154.4766, GNorm = 0.4665, lr_0 = 4.8794e-04
Loss = 1.5878e-02, PNorm = 154.5108, GNorm = 0.4096, lr_0 = 4.8760e-04
Loss = 1.3490e-02, PNorm = 154.5390, GNorm = 0.3538, lr_0 = 4.8727e-04
Loss = 1.8066e-02, PNorm = 154.5783, GNorm = 0.2475, lr_0 = 4.8693e-04
Loss = 1.5835e-02, PNorm = 154.6198, GNorm = 0.3775, lr_0 = 4.8660e-04
Loss = 1.3776e-02, PNorm = 154.6578, GNorm = 0.4913, lr_0 = 4.8627e-04
Loss = 1.4058e-02, PNorm = 154.6937, GNorm = 0.2316, lr_0 = 4.8593e-04
Loss = 1.3264e-02, PNorm = 154.7290, GNorm = 0.2967, lr_0 = 4.8560e-04
Loss = 1.3870e-02, PNorm = 154.7611, GNorm = 0.4586, lr_0 = 4.8527e-04
Loss = 1.5291e-02, PNorm = 154.7920, GNorm = 0.8518, lr_0 = 4.8494e-04
Loss = 1.6276e-02, PNorm = 154.8304, GNorm = 0.1492, lr_0 = 4.8460e-04
Loss = 1.4264e-02, PNorm = 154.8663, GNorm = 0.2615, lr_0 = 4.8427e-04
Loss = 1.3674e-02, PNorm = 154.9034, GNorm = 0.3050, lr_0 = 4.8394e-04
Loss = 1.5432e-02, PNorm = 154.9379, GNorm = 0.1316, lr_0 = 4.8361e-04
Loss = 1.4027e-02, PNorm = 154.9714, GNorm = 0.4754, lr_0 = 4.8328e-04
Loss = 1.3759e-02, PNorm = 155.0056, GNorm = 0.2635, lr_0 = 4.8295e-04
Loss = 1.9767e-02, PNorm = 155.0404, GNorm = 0.2096, lr_0 = 4.8262e-04
Loss = 1.5962e-02, PNorm = 155.0757, GNorm = 0.2710, lr_0 = 4.8228e-04
Loss = 1.2344e-02, PNorm = 155.1104, GNorm = 0.2732, lr_0 = 4.8195e-04
Loss = 1.2683e-02, PNorm = 155.1431, GNorm = 0.3849, lr_0 = 4.8162e-04
Loss = 1.4040e-02, PNorm = 155.1753, GNorm = 0.3657, lr_0 = 4.8129e-04
Loss = 1.4294e-02, PNorm = 155.2110, GNorm = 0.1334, lr_0 = 4.8096e-04
Loss = 1.5028e-02, PNorm = 155.2476, GNorm = 0.2563, lr_0 = 4.8064e-04
Loss = 1.4243e-02, PNorm = 155.2840, GNorm = 0.6229, lr_0 = 4.8031e-04
Loss = 1.6763e-02, PNorm = 155.3195, GNorm = 0.4565, lr_0 = 4.7998e-04
Loss = 1.4923e-02, PNorm = 155.3583, GNorm = 0.1754, lr_0 = 4.7965e-04
Loss = 1.2255e-02, PNorm = 155.3956, GNorm = 0.3311, lr_0 = 4.7932e-04
Loss = 1.2783e-02, PNorm = 155.4280, GNorm = 0.2713, lr_0 = 4.7899e-04
Loss = 1.3080e-02, PNorm = 155.4579, GNorm = 0.2259, lr_0 = 4.7866e-04
Loss = 1.3025e-02, PNorm = 155.4911, GNorm = 0.3509, lr_0 = 4.7833e-04
Loss = 1.3800e-02, PNorm = 155.5196, GNorm = 0.4768, lr_0 = 4.7801e-04
Loss = 1.3744e-02, PNorm = 155.5531, GNorm = 0.1401, lr_0 = 4.7768e-04
Loss = 1.5001e-02, PNorm = 155.5895, GNorm = 0.4610, lr_0 = 4.7735e-04
Loss = 1.5198e-02, PNorm = 155.6242, GNorm = 1.1647, lr_0 = 4.7703e-04
Validation mae = 0.282021
Epoch 11
Loss = 1.1497e-02, PNorm = 155.6511, GNorm = 0.6471, lr_0 = 4.7670e-04
Loss = 1.3630e-02, PNorm = 155.6750, GNorm = 0.1289, lr_0 = 4.7637e-04
Loss = 1.2719e-02, PNorm = 155.6971, GNorm = 0.1916, lr_0 = 4.7605e-04
Loss = 1.3542e-02, PNorm = 155.7224, GNorm = 0.4795, lr_0 = 4.7572e-04
Loss = 1.4604e-02, PNorm = 155.7487, GNorm = 0.2132, lr_0 = 4.7539e-04
Loss = 1.3165e-02, PNorm = 155.7781, GNorm = 0.2437, lr_0 = 4.7507e-04
Loss = 1.1917e-02, PNorm = 155.8019, GNorm = 0.2445, lr_0 = 4.7474e-04
Loss = 1.1577e-02, PNorm = 155.8221, GNorm = 0.2284, lr_0 = 4.7442e-04
Loss = 1.1514e-02, PNorm = 155.8455, GNorm = 0.1562, lr_0 = 4.7409e-04
Loss = 1.1343e-02, PNorm = 155.8685, GNorm = 0.3939, lr_0 = 4.7377e-04
Loss = 1.2634e-02, PNorm = 155.8839, GNorm = 0.2233, lr_0 = 4.7344e-04
Loss = 1.3762e-02, PNorm = 155.9014, GNorm = 0.4656, lr_0 = 4.7312e-04
Loss = 1.1884e-02, PNorm = 155.9251, GNorm = 0.5690, lr_0 = 4.7279e-04
Loss = 1.3004e-02, PNorm = 155.9464, GNorm = 0.3389, lr_0 = 4.7247e-04
Loss = 1.3877e-02, PNorm = 155.9718, GNorm = 0.1568, lr_0 = 4.7215e-04
Loss = 1.2660e-02, PNorm = 155.9991, GNorm = 0.1516, lr_0 = 4.7182e-04
Loss = 1.1277e-02, PNorm = 156.0285, GNorm = 0.2160, lr_0 = 4.7150e-04
Loss = 1.2284e-02, PNorm = 156.0545, GNorm = 0.3414, lr_0 = 4.7118e-04
Loss = 1.0833e-02, PNorm = 156.0753, GNorm = 0.1518, lr_0 = 4.7085e-04
Loss = 1.1802e-02, PNorm = 156.0957, GNorm = 0.5782, lr_0 = 4.7053e-04
Loss = 1.2050e-02, PNorm = 156.1151, GNorm = 0.2863, lr_0 = 4.7021e-04
Loss = 1.1664e-02, PNorm = 156.1389, GNorm = 0.1589, lr_0 = 4.6989e-04
Loss = 1.0867e-02, PNorm = 156.1681, GNorm = 0.2247, lr_0 = 4.6957e-04
Loss = 1.0722e-02, PNorm = 156.1962, GNorm = 0.2372, lr_0 = 4.6924e-04
Loss = 1.1476e-02, PNorm = 156.2237, GNorm = 0.1943, lr_0 = 4.6892e-04
Loss = 1.4267e-02, PNorm = 156.2471, GNorm = 0.6317, lr_0 = 4.6860e-04
Loss = 1.1383e-02, PNorm = 156.2760, GNorm = 0.5571, lr_0 = 4.6828e-04
Loss = 1.1043e-02, PNorm = 156.3009, GNorm = 0.2962, lr_0 = 4.6796e-04
Loss = 1.1554e-02, PNorm = 156.3236, GNorm = 0.3437, lr_0 = 4.6764e-04
Loss = 1.2252e-02, PNorm = 156.3427, GNorm = 0.2665, lr_0 = 4.6732e-04
Loss = 1.2943e-02, PNorm = 156.3672, GNorm = 0.4145, lr_0 = 4.6700e-04
Loss = 9.9109e-03, PNorm = 156.3970, GNorm = 0.2213, lr_0 = 4.6668e-04
Loss = 1.0778e-02, PNorm = 156.4244, GNorm = 0.1341, lr_0 = 4.6636e-04
Loss = 1.1467e-02, PNorm = 156.4498, GNorm = 0.1995, lr_0 = 4.6604e-04
Loss = 1.1243e-02, PNorm = 156.4742, GNorm = 0.1289, lr_0 = 4.6572e-04
Loss = 1.3088e-02, PNorm = 156.4951, GNorm = 0.1249, lr_0 = 4.6540e-04
Loss = 1.5033e-02, PNorm = 156.5193, GNorm = 0.7532, lr_0 = 4.6508e-04
Loss = 1.4118e-02, PNorm = 156.5497, GNorm = 0.2216, lr_0 = 4.6476e-04
Loss = 1.3274e-02, PNorm = 156.5739, GNorm = 0.4277, lr_0 = 4.6445e-04
Loss = 1.3006e-02, PNorm = 156.5961, GNorm = 0.4242, lr_0 = 4.6413e-04
Loss = 1.2847e-02, PNorm = 156.6172, GNorm = 0.6574, lr_0 = 4.6381e-04
Loss = 1.4089e-02, PNorm = 156.6464, GNorm = 0.2509, lr_0 = 4.6349e-04
Loss = 1.0587e-02, PNorm = 156.6768, GNorm = 0.2965, lr_0 = 4.6317e-04
Loss = 1.4705e-02, PNorm = 156.7041, GNorm = 0.1625, lr_0 = 4.6286e-04
Loss = 1.1789e-02, PNorm = 156.7326, GNorm = 0.1911, lr_0 = 4.6254e-04
Loss = 1.1795e-02, PNorm = 156.7590, GNorm = 0.6787, lr_0 = 4.6222e-04
Loss = 1.0974e-02, PNorm = 156.7897, GNorm = 0.5094, lr_0 = 4.6191e-04
Loss = 1.1510e-02, PNorm = 156.8160, GNorm = 0.3754, lr_0 = 4.6159e-04
Loss = 1.1116e-02, PNorm = 156.8424, GNorm = 0.4899, lr_0 = 4.6127e-04
Loss = 1.2648e-02, PNorm = 156.8731, GNorm = 0.4587, lr_0 = 4.6096e-04
Loss = 1.5012e-02, PNorm = 156.8964, GNorm = 0.2411, lr_0 = 4.6064e-04
Loss = 1.4204e-02, PNorm = 156.9250, GNorm = 0.3329, lr_0 = 4.6033e-04
Loss = 1.1296e-02, PNorm = 156.9500, GNorm = 0.4520, lr_0 = 4.6001e-04
Loss = 1.5402e-02, PNorm = 156.9746, GNorm = 0.2645, lr_0 = 4.5970e-04
Loss = 1.1964e-02, PNorm = 157.0043, GNorm = 0.2552, lr_0 = 4.5938e-04
Loss = 9.8617e-03, PNorm = 157.0290, GNorm = 0.2141, lr_0 = 4.5907e-04
Loss = 1.0524e-02, PNorm = 157.0543, GNorm = 0.2915, lr_0 = 4.5875e-04
Loss = 1.0106e-02, PNorm = 157.0755, GNorm = 0.4623, lr_0 = 4.5844e-04
Loss = 1.0047e-02, PNorm = 157.0970, GNorm = 0.4461, lr_0 = 4.5812e-04
Loss = 1.1665e-02, PNorm = 157.1190, GNorm = 0.3642, lr_0 = 4.5781e-04
Loss = 9.6292e-03, PNorm = 157.1410, GNorm = 0.3261, lr_0 = 4.5750e-04
Loss = 1.4047e-02, PNorm = 157.1671, GNorm = 0.1997, lr_0 = 4.5718e-04
Loss = 1.0102e-02, PNorm = 157.1933, GNorm = 0.3505, lr_0 = 4.5687e-04
Loss = 1.4631e-02, PNorm = 157.2239, GNorm = 0.3069, lr_0 = 4.5656e-04
Loss = 1.1826e-02, PNorm = 157.2519, GNorm = 0.1283, lr_0 = 4.5624e-04
Loss = 1.1060e-02, PNorm = 157.2783, GNorm = 0.2010, lr_0 = 4.5593e-04
Loss = 1.0294e-02, PNorm = 157.3070, GNorm = 0.3484, lr_0 = 4.5562e-04
Loss = 1.3869e-02, PNorm = 157.3336, GNorm = 0.4832, lr_0 = 4.5531e-04
Loss = 1.2352e-02, PNorm = 157.3643, GNorm = 0.2490, lr_0 = 4.5499e-04
Loss = 1.0592e-02, PNorm = 157.3873, GNorm = 0.3942, lr_0 = 4.5468e-04
Loss = 1.1016e-02, PNorm = 157.4101, GNorm = 0.3299, lr_0 = 4.5437e-04
Loss = 1.3495e-02, PNorm = 157.4381, GNorm = 0.3638, lr_0 = 4.5406e-04
Loss = 1.3406e-02, PNorm = 157.4706, GNorm = 0.2876, lr_0 = 4.5375e-04
Loss = 1.1394e-02, PNorm = 157.5025, GNorm = 0.3831, lr_0 = 4.5344e-04
Loss = 1.2683e-02, PNorm = 157.5267, GNorm = 0.1723, lr_0 = 4.5313e-04
Loss = 1.2308e-02, PNorm = 157.5502, GNorm = 0.2000, lr_0 = 4.5282e-04
Loss = 1.0542e-02, PNorm = 157.5782, GNorm = 0.3339, lr_0 = 4.5251e-04
Loss = 1.0861e-02, PNorm = 157.6008, GNorm = 0.3470, lr_0 = 4.5220e-04
Loss = 1.2204e-02, PNorm = 157.6279, GNorm = 0.1778, lr_0 = 4.5189e-04
Loss = 1.1060e-02, PNorm = 157.6500, GNorm = 0.4009, lr_0 = 4.5158e-04
Loss = 1.1894e-02, PNorm = 157.6777, GNorm = 0.3792, lr_0 = 4.5127e-04
Loss = 1.1556e-02, PNorm = 157.7041, GNorm = 0.2207, lr_0 = 4.5096e-04
Loss = 1.0666e-02, PNorm = 157.7318, GNorm = 0.1268, lr_0 = 4.5065e-04
Loss = 1.2087e-02, PNorm = 157.7587, GNorm = 0.2732, lr_0 = 4.5034e-04
Loss = 1.1086e-02, PNorm = 157.7842, GNorm = 0.2935, lr_0 = 4.5003e-04
Loss = 1.5552e-02, PNorm = 157.8071, GNorm = 0.3065, lr_0 = 4.4972e-04
Loss = 1.1083e-02, PNorm = 157.8333, GNorm = 0.2677, lr_0 = 4.4942e-04
Loss = 1.1881e-02, PNorm = 157.8628, GNorm = 0.4722, lr_0 = 4.4911e-04
Loss = 1.2192e-02, PNorm = 157.8900, GNorm = 0.7447, lr_0 = 4.4880e-04
Loss = 1.2955e-02, PNorm = 157.9137, GNorm = 0.2586, lr_0 = 4.4849e-04
Loss = 1.5964e-02, PNorm = 157.9484, GNorm = 0.4974, lr_0 = 4.4819e-04
Loss = 1.3720e-02, PNorm = 157.9787, GNorm = 0.2880, lr_0 = 4.4788e-04
Loss = 1.1693e-02, PNorm = 158.0095, GNorm = 0.1587, lr_0 = 4.4757e-04
Loss = 1.1727e-02, PNorm = 158.0395, GNorm = 0.6284, lr_0 = 4.4727e-04
Loss = 1.2824e-02, PNorm = 158.0679, GNorm = 0.2745, lr_0 = 4.4696e-04
Loss = 1.3055e-02, PNorm = 158.0970, GNorm = 0.2225, lr_0 = 4.4665e-04
Loss = 1.1830e-02, PNorm = 158.1278, GNorm = 0.1647, lr_0 = 4.4635e-04
Loss = 1.1574e-02, PNorm = 158.1588, GNorm = 0.5372, lr_0 = 4.4604e-04
Loss = 1.2232e-02, PNorm = 158.1904, GNorm = 0.3768, lr_0 = 4.4574e-04
Loss = 1.1561e-02, PNorm = 158.2205, GNorm = 0.2517, lr_0 = 4.4543e-04
Loss = 1.0648e-02, PNorm = 158.2461, GNorm = 0.5179, lr_0 = 4.4513e-04
Loss = 1.1040e-02, PNorm = 158.2732, GNorm = 0.4639, lr_0 = 4.4482e-04
Loss = 1.2312e-02, PNorm = 158.3002, GNorm = 0.2190, lr_0 = 4.4452e-04
Loss = 1.3872e-02, PNorm = 158.3292, GNorm = 0.1272, lr_0 = 4.4421e-04
Loss = 1.2478e-02, PNorm = 158.3625, GNorm = 0.2821, lr_0 = 4.4391e-04
Loss = 1.0457e-02, PNorm = 158.3884, GNorm = 0.5319, lr_0 = 4.4360e-04
Loss = 1.1506e-02, PNorm = 158.4162, GNorm = 0.3084, lr_0 = 4.4330e-04
Loss = 1.0450e-02, PNorm = 158.4416, GNorm = 0.3347, lr_0 = 4.4299e-04
Loss = 1.1156e-02, PNorm = 158.4699, GNorm = 0.1867, lr_0 = 4.4269e-04
Loss = 1.2412e-02, PNorm = 158.4997, GNorm = 0.9166, lr_0 = 4.4239e-04
Loss = 1.4275e-02, PNorm = 158.5252, GNorm = 0.3223, lr_0 = 4.4209e-04
Loss = 1.1684e-02, PNorm = 158.5572, GNorm = 0.3771, lr_0 = 4.4178e-04
Loss = 1.1007e-02, PNorm = 158.5877, GNorm = 0.2724, lr_0 = 4.4148e-04
Loss = 1.1702e-02, PNorm = 158.6105, GNorm = 0.2616, lr_0 = 4.4118e-04
Loss = 1.2353e-02, PNorm = 158.6336, GNorm = 0.7401, lr_0 = 4.4088e-04
Loss = 1.1130e-02, PNorm = 158.6586, GNorm = 0.1981, lr_0 = 4.4057e-04
Loss = 1.4857e-02, PNorm = 158.6874, GNorm = 0.4950, lr_0 = 4.4027e-04
Loss = 1.2576e-02, PNorm = 158.7176, GNorm = 0.1951, lr_0 = 4.3997e-04
Loss = 1.0806e-02, PNorm = 158.7413, GNorm = 0.5865, lr_0 = 4.3967e-04
Loss = 1.2824e-02, PNorm = 158.7650, GNorm = 0.2091, lr_0 = 4.3937e-04
Validation mae = 0.280783
Epoch 12
Loss = 1.1138e-02, PNorm = 158.7871, GNorm = 0.2660, lr_0 = 4.3907e-04
Loss = 9.8756e-03, PNorm = 158.8073, GNorm = 0.3131, lr_0 = 4.3877e-04
Loss = 9.7282e-03, PNorm = 158.8237, GNorm = 0.4062, lr_0 = 4.3846e-04
Loss = 1.1122e-02, PNorm = 158.8431, GNorm = 0.7640, lr_0 = 4.3816e-04
Loss = 1.1725e-02, PNorm = 158.8626, GNorm = 0.1861, lr_0 = 4.3786e-04
Loss = 1.3389e-02, PNorm = 158.8840, GNorm = 0.4532, lr_0 = 4.3756e-04
Loss = 1.2666e-02, PNorm = 158.9020, GNorm = 0.2777, lr_0 = 4.3726e-04
Loss = 1.1808e-02, PNorm = 158.9232, GNorm = 0.2126, lr_0 = 4.3696e-04
Loss = 9.5888e-03, PNorm = 158.9437, GNorm = 0.2797, lr_0 = 4.3667e-04
Loss = 9.5492e-03, PNorm = 158.9653, GNorm = 0.3559, lr_0 = 4.3637e-04
Loss = 1.2059e-02, PNorm = 158.9842, GNorm = 0.3049, lr_0 = 4.3607e-04
Loss = 1.2609e-02, PNorm = 159.0043, GNorm = 0.4779, lr_0 = 4.3577e-04
Loss = 1.0678e-02, PNorm = 159.0245, GNorm = 0.2442, lr_0 = 4.3547e-04
Loss = 8.4653e-03, PNorm = 159.0426, GNorm = 0.5388, lr_0 = 4.3517e-04
Loss = 9.1696e-03, PNorm = 159.0573, GNorm = 0.4173, lr_0 = 4.3487e-04
Loss = 1.2965e-02, PNorm = 159.0766, GNorm = 1.1394, lr_0 = 4.3458e-04
Loss = 9.1060e-03, PNorm = 159.0978, GNorm = 0.2671, lr_0 = 4.3428e-04
Loss = 1.0405e-02, PNorm = 159.1142, GNorm = 0.3987, lr_0 = 4.3398e-04
Loss = 9.4482e-03, PNorm = 159.1346, GNorm = 0.0984, lr_0 = 4.3368e-04
Loss = 1.0247e-02, PNorm = 159.1525, GNorm = 0.1607, lr_0 = 4.3339e-04
Loss = 9.0911e-03, PNorm = 159.1701, GNorm = 0.2484, lr_0 = 4.3309e-04
Loss = 9.7748e-03, PNorm = 159.1905, GNorm = 0.3333, lr_0 = 4.3279e-04
Loss = 1.0863e-02, PNorm = 159.2108, GNorm = 0.2271, lr_0 = 4.3250e-04
Loss = 1.1582e-02, PNorm = 159.2316, GNorm = 0.3201, lr_0 = 4.3220e-04
Loss = 8.8061e-03, PNorm = 159.2547, GNorm = 0.4380, lr_0 = 4.3190e-04
Loss = 1.0446e-02, PNorm = 159.2730, GNorm = 0.4291, lr_0 = 4.3161e-04
Loss = 9.4419e-03, PNorm = 159.2933, GNorm = 0.4371, lr_0 = 4.3131e-04
Loss = 9.5154e-03, PNorm = 159.3151, GNorm = 0.1692, lr_0 = 4.3102e-04
Loss = 9.7910e-03, PNorm = 159.3430, GNorm = 0.5004, lr_0 = 4.3072e-04
Loss = 9.2940e-03, PNorm = 159.3651, GNorm = 0.1705, lr_0 = 4.3043e-04
Loss = 9.6258e-03, PNorm = 159.3891, GNorm = 0.1032, lr_0 = 4.3013e-04
Loss = 1.1177e-02, PNorm = 159.4117, GNorm = 0.1604, lr_0 = 4.2984e-04
Loss = 1.0400e-02, PNorm = 159.4318, GNorm = 0.2198, lr_0 = 4.2954e-04
Loss = 1.0919e-02, PNorm = 159.4523, GNorm = 0.2099, lr_0 = 4.2925e-04
Loss = 9.4461e-03, PNorm = 159.4735, GNorm = 0.3128, lr_0 = 4.2895e-04
Loss = 9.1786e-03, PNorm = 159.4929, GNorm = 0.2944, lr_0 = 4.2866e-04
Loss = 9.4979e-03, PNorm = 159.5137, GNorm = 0.4805, lr_0 = 4.2837e-04
Loss = 9.2775e-03, PNorm = 159.5336, GNorm = 0.2434, lr_0 = 4.2807e-04
Loss = 1.1636e-02, PNorm = 159.5593, GNorm = 0.2929, lr_0 = 4.2778e-04
Loss = 8.9178e-03, PNorm = 159.5844, GNorm = 0.5738, lr_0 = 4.2749e-04
Loss = 8.6447e-03, PNorm = 159.6058, GNorm = 0.2797, lr_0 = 4.2719e-04
Loss = 9.1398e-03, PNorm = 159.6227, GNorm = 0.1175, lr_0 = 4.2690e-04
Loss = 1.0221e-02, PNorm = 159.6417, GNorm = 0.4610, lr_0 = 4.2661e-04
Loss = 1.2866e-02, PNorm = 159.6648, GNorm = 0.1221, lr_0 = 4.2632e-04
Loss = 8.0548e-03, PNorm = 159.6850, GNorm = 0.1189, lr_0 = 4.2602e-04
Loss = 1.1567e-02, PNorm = 159.7057, GNorm = 0.7658, lr_0 = 4.2573e-04
Loss = 1.3729e-02, PNorm = 159.7247, GNorm = 0.3606, lr_0 = 4.2544e-04
Loss = 1.1769e-02, PNorm = 159.7457, GNorm = 0.1224, lr_0 = 4.2515e-04
Loss = 9.9680e-03, PNorm = 159.7696, GNorm = 0.3198, lr_0 = 4.2486e-04
Loss = 9.1794e-03, PNorm = 159.7922, GNorm = 0.3764, lr_0 = 4.2457e-04
Loss = 9.8149e-03, PNorm = 159.8156, GNorm = 0.1114, lr_0 = 4.2428e-04
Loss = 1.0478e-02, PNorm = 159.8390, GNorm = 0.4119, lr_0 = 4.2399e-04
Loss = 8.5715e-03, PNorm = 159.8609, GNorm = 0.2688, lr_0 = 4.2370e-04
Loss = 1.3512e-02, PNorm = 159.8807, GNorm = 1.2781, lr_0 = 4.2340e-04
Loss = 1.2334e-02, PNorm = 159.9058, GNorm = 0.2572, lr_0 = 4.2311e-04
Loss = 7.8356e-03, PNorm = 159.9302, GNorm = 0.1753, lr_0 = 4.2283e-04
Loss = 9.9919e-03, PNorm = 159.9543, GNorm = 0.2903, lr_0 = 4.2254e-04
Loss = 1.0825e-02, PNorm = 159.9766, GNorm = 0.1263, lr_0 = 4.2225e-04
Loss = 8.4288e-03, PNorm = 159.9968, GNorm = 0.2252, lr_0 = 4.2196e-04
Loss = 9.4704e-03, PNorm = 160.0158, GNorm = 0.5281, lr_0 = 4.2167e-04
Loss = 1.0390e-02, PNorm = 160.0400, GNorm = 0.2755, lr_0 = 4.2138e-04
Loss = 9.5767e-03, PNorm = 160.0649, GNorm = 0.4566, lr_0 = 4.2109e-04
Loss = 1.3186e-02, PNorm = 160.0850, GNorm = 0.1530, lr_0 = 4.2080e-04
Loss = 1.0115e-02, PNorm = 160.1057, GNorm = 0.2120, lr_0 = 4.2051e-04
Loss = 1.0276e-02, PNorm = 160.1342, GNorm = 0.1956, lr_0 = 4.2023e-04
Loss = 1.0874e-02, PNorm = 160.1605, GNorm = 0.6000, lr_0 = 4.1994e-04
Loss = 1.0371e-02, PNorm = 160.1868, GNorm = 0.1957, lr_0 = 4.1965e-04
Loss = 1.2362e-02, PNorm = 160.2106, GNorm = 0.2343, lr_0 = 4.1936e-04
Loss = 1.2727e-02, PNorm = 160.2331, GNorm = 0.1233, lr_0 = 4.1907e-04
Loss = 9.9461e-03, PNorm = 160.2567, GNorm = 0.3585, lr_0 = 4.1879e-04
Loss = 8.7079e-03, PNorm = 160.2811, GNorm = 0.2755, lr_0 = 4.1850e-04
Loss = 7.9263e-03, PNorm = 160.3028, GNorm = 0.1797, lr_0 = 4.1821e-04
Loss = 1.0065e-02, PNorm = 160.3251, GNorm = 0.3375, lr_0 = 4.1793e-04
Loss = 8.1039e-03, PNorm = 160.3447, GNorm = 0.1405, lr_0 = 4.1764e-04
Loss = 1.0243e-02, PNorm = 160.3676, GNorm = 0.1297, lr_0 = 4.1736e-04
Loss = 8.8367e-03, PNorm = 160.3904, GNorm = 0.2585, lr_0 = 4.1707e-04
Loss = 9.8489e-03, PNorm = 160.4106, GNorm = 0.2232, lr_0 = 4.1678e-04
Loss = 1.0266e-02, PNorm = 160.4342, GNorm = 0.1813, lr_0 = 4.1650e-04
Loss = 7.2491e-03, PNorm = 160.4581, GNorm = 0.4215, lr_0 = 4.1621e-04
Loss = 8.2986e-03, PNorm = 160.4816, GNorm = 0.1598, lr_0 = 4.1593e-04
Loss = 8.6193e-03, PNorm = 160.5038, GNorm = 0.1992, lr_0 = 4.1564e-04
Loss = 8.9850e-03, PNorm = 160.5279, GNorm = 0.2657, lr_0 = 4.1536e-04
Loss = 1.3165e-02, PNorm = 160.5491, GNorm = 0.1790, lr_0 = 4.1507e-04
Loss = 1.0112e-02, PNorm = 160.5717, GNorm = 0.2821, lr_0 = 4.1479e-04
Loss = 9.5853e-03, PNorm = 160.5922, GNorm = 0.2451, lr_0 = 4.1450e-04
Loss = 1.2131e-02, PNorm = 160.6108, GNorm = 0.2014, lr_0 = 4.1422e-04
Loss = 1.0591e-02, PNorm = 160.6314, GNorm = 0.2809, lr_0 = 4.1394e-04
Loss = 8.5486e-03, PNorm = 160.6547, GNorm = 0.2621, lr_0 = 4.1365e-04
Loss = 9.7728e-03, PNorm = 160.6800, GNorm = 0.2337, lr_0 = 4.1337e-04
Loss = 1.1845e-02, PNorm = 160.7034, GNorm = 0.5428, lr_0 = 4.1309e-04
Loss = 1.2074e-02, PNorm = 160.7252, GNorm = 0.2902, lr_0 = 4.1280e-04
Loss = 1.0563e-02, PNorm = 160.7520, GNorm = 0.2911, lr_0 = 4.1252e-04
Loss = 1.1566e-02, PNorm = 160.7755, GNorm = 0.6321, lr_0 = 4.1224e-04
Loss = 8.6536e-03, PNorm = 160.7985, GNorm = 0.2105, lr_0 = 4.1196e-04
Loss = 9.6862e-03, PNorm = 160.8203, GNorm = 0.3057, lr_0 = 4.1167e-04
Loss = 1.0742e-02, PNorm = 160.8437, GNorm = 0.4292, lr_0 = 4.1139e-04
Loss = 1.2130e-02, PNorm = 160.8707, GNorm = 0.6202, lr_0 = 4.1111e-04
Loss = 8.7931e-03, PNorm = 160.8974, GNorm = 0.1873, lr_0 = 4.1083e-04
Loss = 1.0281e-02, PNorm = 160.9193, GNorm = 0.2168, lr_0 = 4.1055e-04
Loss = 9.6609e-03, PNorm = 160.9416, GNorm = 0.3415, lr_0 = 4.1027e-04
Loss = 1.1217e-02, PNorm = 160.9643, GNorm = 0.3955, lr_0 = 4.0998e-04
Loss = 1.1434e-02, PNorm = 160.9874, GNorm = 0.2748, lr_0 = 4.0970e-04
Loss = 8.8138e-03, PNorm = 161.0084, GNorm = 0.1223, lr_0 = 4.0942e-04
Loss = 9.9006e-03, PNorm = 161.0306, GNorm = 0.0937, lr_0 = 4.0914e-04
Loss = 8.5877e-03, PNorm = 161.0540, GNorm = 0.2096, lr_0 = 4.0886e-04
Loss = 9.8249e-03, PNorm = 161.0723, GNorm = 0.3612, lr_0 = 4.0858e-04
Loss = 9.9089e-03, PNorm = 161.0934, GNorm = 0.3781, lr_0 = 4.0830e-04
Loss = 1.0080e-02, PNorm = 161.1161, GNorm = 0.5796, lr_0 = 4.0802e-04
Loss = 1.0933e-02, PNorm = 161.1395, GNorm = 0.1047, lr_0 = 4.0774e-04
Loss = 9.7419e-03, PNorm = 161.1624, GNorm = 0.4253, lr_0 = 4.0746e-04
Loss = 8.3270e-03, PNorm = 161.1862, GNorm = 0.1479, lr_0 = 4.0718e-04
Loss = 1.0543e-02, PNorm = 161.2105, GNorm = 0.3689, lr_0 = 4.0691e-04
Loss = 1.0154e-02, PNorm = 161.2370, GNorm = 0.3712, lr_0 = 4.0663e-04
Loss = 8.0706e-03, PNorm = 161.2638, GNorm = 0.2388, lr_0 = 4.0635e-04
Loss = 1.0315e-02, PNorm = 161.2881, GNorm = 0.3739, lr_0 = 4.0607e-04
Loss = 1.0283e-02, PNorm = 161.3105, GNorm = 0.4271, lr_0 = 4.0579e-04
Loss = 1.0407e-02, PNorm = 161.3347, GNorm = 0.2672, lr_0 = 4.0551e-04
Loss = 1.0131e-02, PNorm = 161.3601, GNorm = 0.1217, lr_0 = 4.0524e-04
Loss = 9.8512e-03, PNorm = 161.3831, GNorm = 0.3039, lr_0 = 4.0496e-04
Loss = 1.0108e-02, PNorm = 161.4085, GNorm = 0.9530, lr_0 = 4.0468e-04
Validation mae = 0.281349
Epoch 13
Loss = 9.6578e-03, PNorm = 161.4264, GNorm = 0.3890, lr_0 = 4.0440e-04
Loss = 8.7476e-03, PNorm = 161.4438, GNorm = 0.2436, lr_0 = 4.0413e-04
Loss = 1.0375e-02, PNorm = 161.4579, GNorm = 0.2217, lr_0 = 4.0385e-04
Loss = 9.1736e-03, PNorm = 161.4747, GNorm = 0.1495, lr_0 = 4.0357e-04
Loss = 9.3730e-03, PNorm = 161.4919, GNorm = 0.1958, lr_0 = 4.0330e-04
Loss = 8.6882e-03, PNorm = 161.5100, GNorm = 0.8391, lr_0 = 4.0302e-04
Loss = 8.6587e-03, PNorm = 161.5289, GNorm = 0.1587, lr_0 = 4.0274e-04
Loss = 8.9557e-03, PNorm = 161.5455, GNorm = 0.2276, lr_0 = 4.0247e-04
Loss = 1.3266e-02, PNorm = 161.5635, GNorm = 0.2354, lr_0 = 4.0219e-04
Loss = 7.9615e-03, PNorm = 161.5833, GNorm = 0.1701, lr_0 = 4.0192e-04
Loss = 1.1464e-02, PNorm = 161.5995, GNorm = 0.2083, lr_0 = 4.0164e-04
Loss = 8.6324e-03, PNorm = 161.6125, GNorm = 0.1209, lr_0 = 4.0137e-04
Loss = 8.5841e-03, PNorm = 161.6306, GNorm = 0.1673, lr_0 = 4.0109e-04
Loss = 8.9645e-03, PNorm = 161.6472, GNorm = 0.3437, lr_0 = 4.0082e-04
Loss = 7.2288e-03, PNorm = 161.6665, GNorm = 0.1133, lr_0 = 4.0054e-04
Loss = 8.6780e-03, PNorm = 161.6860, GNorm = 0.1446, lr_0 = 4.0027e-04
Loss = 7.8518e-03, PNorm = 161.7036, GNorm = 0.4677, lr_0 = 3.9999e-04
Loss = 8.4202e-03, PNorm = 161.7187, GNorm = 0.2560, lr_0 = 3.9972e-04
Loss = 9.8598e-03, PNorm = 161.7363, GNorm = 0.3977, lr_0 = 3.9945e-04
Loss = 6.7277e-03, PNorm = 161.7533, GNorm = 0.1803, lr_0 = 3.9917e-04
Loss = 8.1173e-03, PNorm = 161.7669, GNorm = 0.3682, lr_0 = 3.9890e-04
Loss = 7.8997e-03, PNorm = 161.7892, GNorm = 0.2882, lr_0 = 3.9863e-04
Loss = 9.4681e-03, PNorm = 161.8063, GNorm = 0.4412, lr_0 = 3.9835e-04
Loss = 7.3558e-03, PNorm = 161.8226, GNorm = 0.1473, lr_0 = 3.9808e-04
Loss = 8.3605e-03, PNorm = 161.8414, GNorm = 0.1366, lr_0 = 3.9781e-04
Loss = 8.3449e-03, PNorm = 161.8626, GNorm = 0.3051, lr_0 = 3.9753e-04
Loss = 7.3443e-03, PNorm = 161.8831, GNorm = 0.5226, lr_0 = 3.9726e-04
Loss = 9.3887e-03, PNorm = 161.9018, GNorm = 0.1857, lr_0 = 3.9699e-04
Loss = 7.2007e-03, PNorm = 161.9220, GNorm = 0.4938, lr_0 = 3.9672e-04
Loss = 1.2401e-02, PNorm = 161.9379, GNorm = 0.2971, lr_0 = 3.9645e-04
Loss = 1.0541e-02, PNorm = 161.9524, GNorm = 0.6118, lr_0 = 3.9617e-04
Loss = 7.7875e-03, PNorm = 161.9677, GNorm = 0.1714, lr_0 = 3.9590e-04
Loss = 8.2717e-03, PNorm = 161.9815, GNorm = 0.4855, lr_0 = 3.9563e-04
Loss = 1.1321e-02, PNorm = 161.9990, GNorm = 0.2018, lr_0 = 3.9536e-04
Loss = 7.4046e-03, PNorm = 162.0165, GNorm = 0.1117, lr_0 = 3.9509e-04
Loss = 9.8936e-03, PNorm = 162.0368, GNorm = 0.2398, lr_0 = 3.9482e-04
Loss = 7.7734e-03, PNorm = 162.0564, GNorm = 0.1126, lr_0 = 3.9455e-04
Loss = 7.8711e-03, PNorm = 162.0764, GNorm = 0.2039, lr_0 = 3.9428e-04
Loss = 9.0972e-03, PNorm = 162.0975, GNorm = 0.4056, lr_0 = 3.9401e-04
Loss = 8.5887e-03, PNorm = 162.1178, GNorm = 0.4450, lr_0 = 3.9374e-04
Loss = 9.4141e-03, PNorm = 162.1334, GNorm = 0.3115, lr_0 = 3.9347e-04
Loss = 8.1852e-03, PNorm = 162.1483, GNorm = 0.4348, lr_0 = 3.9320e-04
Loss = 9.1916e-03, PNorm = 162.1665, GNorm = 0.5728, lr_0 = 3.9293e-04
Loss = 9.5910e-03, PNorm = 162.1860, GNorm = 0.3776, lr_0 = 3.9266e-04
Loss = 7.5654e-03, PNorm = 162.2050, GNorm = 0.4888, lr_0 = 3.9239e-04
Loss = 9.0815e-03, PNorm = 162.2279, GNorm = 0.2385, lr_0 = 3.9212e-04
Loss = 7.4692e-03, PNorm = 162.2452, GNorm = 0.1391, lr_0 = 3.9185e-04
Loss = 8.5044e-03, PNorm = 162.2659, GNorm = 0.3524, lr_0 = 3.9159e-04
Loss = 8.1324e-03, PNorm = 162.2839, GNorm = 0.4929, lr_0 = 3.9132e-04
Loss = 6.2333e-03, PNorm = 162.3004, GNorm = 0.1736, lr_0 = 3.9105e-04
Loss = 8.0536e-03, PNorm = 162.3134, GNorm = 0.3469, lr_0 = 3.9078e-04
Loss = 1.0448e-02, PNorm = 162.3266, GNorm = 0.1383, lr_0 = 3.9051e-04
Loss = 8.4338e-03, PNorm = 162.3440, GNorm = 0.2685, lr_0 = 3.9025e-04
Loss = 8.5268e-03, PNorm = 162.3636, GNorm = 0.3074, lr_0 = 3.8998e-04
Loss = 8.9396e-03, PNorm = 162.3819, GNorm = 0.1096, lr_0 = 3.8971e-04
Loss = 1.0446e-02, PNorm = 162.3996, GNorm = 0.3873, lr_0 = 3.8945e-04
Loss = 1.2914e-02, PNorm = 162.4161, GNorm = 0.1994, lr_0 = 3.8918e-04
Loss = 9.7109e-03, PNorm = 162.4328, GNorm = 0.5929, lr_0 = 3.8891e-04
Loss = 7.5000e-03, PNorm = 162.4518, GNorm = 0.0626, lr_0 = 3.8865e-04
Loss = 7.0731e-03, PNorm = 162.4683, GNorm = 0.1854, lr_0 = 3.8838e-04
Loss = 7.9781e-03, PNorm = 162.4869, GNorm = 0.1149, lr_0 = 3.8811e-04
Loss = 9.6815e-03, PNorm = 162.5065, GNorm = 0.5295, lr_0 = 3.8785e-04
Loss = 8.2205e-03, PNorm = 162.5219, GNorm = 0.2709, lr_0 = 3.8758e-04
Loss = 8.6394e-03, PNorm = 162.5403, GNorm = 0.1770, lr_0 = 3.8732e-04
Loss = 9.3415e-03, PNorm = 162.5551, GNorm = 0.2616, lr_0 = 3.8705e-04
Loss = 8.0226e-03, PNorm = 162.5742, GNorm = 0.0849, lr_0 = 3.8679e-04
Loss = 7.5723e-03, PNorm = 162.5923, GNorm = 0.1626, lr_0 = 3.8652e-04
Loss = 8.9522e-03, PNorm = 162.6134, GNorm = 0.2354, lr_0 = 3.8626e-04
Loss = 8.7294e-03, PNorm = 162.6370, GNorm = 0.3528, lr_0 = 3.8599e-04
Loss = 6.8058e-03, PNorm = 162.6532, GNorm = 0.4306, lr_0 = 3.8573e-04
Loss = 7.8162e-03, PNorm = 162.6694, GNorm = 0.2225, lr_0 = 3.8546e-04
Loss = 8.2190e-03, PNorm = 162.6868, GNorm = 0.7372, lr_0 = 3.8520e-04
Loss = 1.0279e-02, PNorm = 162.7046, GNorm = 0.3413, lr_0 = 3.8493e-04
Loss = 1.1194e-02, PNorm = 162.7205, GNorm = 0.5647, lr_0 = 3.8467e-04
Loss = 8.2238e-03, PNorm = 162.7410, GNorm = 0.2612, lr_0 = 3.8441e-04
Loss = 8.8285e-03, PNorm = 162.7587, GNorm = 0.2992, lr_0 = 3.8414e-04
Loss = 7.8267e-03, PNorm = 162.7743, GNorm = 0.1824, lr_0 = 3.8388e-04
Loss = 8.2181e-03, PNorm = 162.7961, GNorm = 0.2907, lr_0 = 3.8362e-04
Loss = 8.6330e-03, PNorm = 162.8128, GNorm = 0.1127, lr_0 = 3.8336e-04
Loss = 9.1874e-03, PNorm = 162.8306, GNorm = 0.7114, lr_0 = 3.8309e-04
Loss = 7.6685e-03, PNorm = 162.8484, GNorm = 0.1640, lr_0 = 3.8283e-04
Loss = 9.9655e-03, PNorm = 162.8688, GNorm = 0.0967, lr_0 = 3.8257e-04
Loss = 1.0743e-02, PNorm = 162.8827, GNorm = 0.2928, lr_0 = 3.8231e-04
Loss = 7.5775e-03, PNorm = 162.9024, GNorm = 0.3053, lr_0 = 3.8204e-04
Loss = 8.4928e-03, PNorm = 162.9227, GNorm = 0.4030, lr_0 = 3.8178e-04
Loss = 7.8753e-03, PNorm = 162.9426, GNorm = 0.1923, lr_0 = 3.8152e-04
Loss = 8.9803e-03, PNorm = 162.9613, GNorm = 0.3456, lr_0 = 3.8126e-04
Loss = 7.9472e-03, PNorm = 162.9827, GNorm = 0.1814, lr_0 = 3.8100e-04
Loss = 8.1359e-03, PNorm = 163.0050, GNorm = 0.0970, lr_0 = 3.8074e-04
Loss = 1.0414e-02, PNorm = 163.0217, GNorm = 0.4749, lr_0 = 3.8048e-04
Loss = 7.7631e-03, PNorm = 163.0365, GNorm = 0.4205, lr_0 = 3.8022e-04
Loss = 7.8548e-03, PNorm = 163.0483, GNorm = 0.1305, lr_0 = 3.7995e-04
Loss = 8.0970e-03, PNorm = 163.0633, GNorm = 0.1638, lr_0 = 3.7969e-04
Loss = 8.4598e-03, PNorm = 163.0828, GNorm = 0.2331, lr_0 = 3.7943e-04
Loss = 8.2496e-03, PNorm = 163.1057, GNorm = 0.1499, lr_0 = 3.7917e-04
Loss = 9.5685e-03, PNorm = 163.1318, GNorm = 0.1123, lr_0 = 3.7891e-04
Loss = 8.7574e-03, PNorm = 163.1531, GNorm = 0.3865, lr_0 = 3.7866e-04
Loss = 1.3095e-02, PNorm = 163.1715, GNorm = 0.1701, lr_0 = 3.7840e-04
Loss = 7.0698e-03, PNorm = 163.1942, GNorm = 0.2674, lr_0 = 3.7814e-04
Loss = 8.3882e-03, PNorm = 163.2141, GNorm = 0.6174, lr_0 = 3.7788e-04
Loss = 7.7893e-03, PNorm = 163.2349, GNorm = 0.4046, lr_0 = 3.7762e-04
Loss = 9.3286e-03, PNorm = 163.2520, GNorm = 0.1140, lr_0 = 3.7736e-04
Loss = 7.6581e-03, PNorm = 163.2714, GNorm = 0.0885, lr_0 = 3.7710e-04
Loss = 7.6902e-03, PNorm = 163.2896, GNorm = 0.3050, lr_0 = 3.7684e-04
Loss = 9.2626e-03, PNorm = 163.3099, GNorm = 0.5660, lr_0 = 3.7659e-04
Loss = 8.4732e-03, PNorm = 163.3314, GNorm = 0.2690, lr_0 = 3.7633e-04
Loss = 1.2384e-02, PNorm = 163.3523, GNorm = 0.2396, lr_0 = 3.7607e-04
Loss = 8.1953e-03, PNorm = 163.3703, GNorm = 0.5717, lr_0 = 3.7581e-04
Loss = 8.7129e-03, PNorm = 163.3898, GNorm = 0.3017, lr_0 = 3.7555e-04
Loss = 7.3728e-03, PNorm = 163.4092, GNorm = 0.2439, lr_0 = 3.7530e-04
Loss = 8.2503e-03, PNorm = 163.4284, GNorm = 0.0727, lr_0 = 3.7504e-04
Loss = 8.4416e-03, PNorm = 163.4492, GNorm = 0.2476, lr_0 = 3.7478e-04
Loss = 7.7884e-03, PNorm = 163.4695, GNorm = 0.2551, lr_0 = 3.7453e-04
Loss = 7.4337e-03, PNorm = 163.4904, GNorm = 0.3652, lr_0 = 3.7427e-04
Loss = 7.5183e-03, PNorm = 163.5071, GNorm = 0.2075, lr_0 = 3.7401e-04
Loss = 9.4192e-03, PNorm = 163.5237, GNorm = 0.2871, lr_0 = 3.7376e-04
Loss = 7.6824e-03, PNorm = 163.5418, GNorm = 0.1945, lr_0 = 3.7350e-04
Loss = 9.5765e-03, PNorm = 163.5633, GNorm = 0.3918, lr_0 = 3.7325e-04
Loss = 8.9748e-03, PNorm = 163.5869, GNorm = 0.3048, lr_0 = 3.7299e-04
Loss = 9.3295e-03, PNorm = 163.6074, GNorm = 0.1126, lr_0 = 3.7273e-04
Validation mae = 0.280232
Epoch 14
Loss = 7.4354e-03, PNorm = 163.6232, GNorm = 0.1488, lr_0 = 3.7248e-04
Loss = 8.3618e-03, PNorm = 163.6382, GNorm = 0.1209, lr_0 = 3.7222e-04
Loss = 9.2782e-03, PNorm = 163.6524, GNorm = 0.4924, lr_0 = 3.7197e-04
Loss = 7.3165e-03, PNorm = 163.6684, GNorm = 0.3574, lr_0 = 3.7171e-04
Loss = 6.8058e-03, PNorm = 163.6823, GNorm = 0.2594, lr_0 = 3.7146e-04
Loss = 7.6345e-03, PNorm = 163.6948, GNorm = 0.1578, lr_0 = 3.7120e-04
Loss = 6.8644e-03, PNorm = 163.7094, GNorm = 0.2520, lr_0 = 3.7095e-04
Loss = 6.9169e-03, PNorm = 163.7227, GNorm = 0.3010, lr_0 = 3.7070e-04
Loss = 6.8853e-03, PNorm = 163.7320, GNorm = 0.1130, lr_0 = 3.7044e-04
Loss = 7.5665e-03, PNorm = 163.7416, GNorm = 0.1378, lr_0 = 3.7019e-04
Loss = 8.9841e-03, PNorm = 163.7555, GNorm = 0.1068, lr_0 = 3.6993e-04
Loss = 8.3120e-03, PNorm = 163.7697, GNorm = 0.2627, lr_0 = 3.6968e-04
Loss = 6.2541e-03, PNorm = 163.7854, GNorm = 0.2236, lr_0 = 3.6943e-04
Loss = 6.4606e-03, PNorm = 163.7968, GNorm = 0.3929, lr_0 = 3.6917e-04
Loss = 6.2869e-03, PNorm = 163.8076, GNorm = 0.3210, lr_0 = 3.6892e-04
Loss = 8.5678e-03, PNorm = 163.8186, GNorm = 0.2218, lr_0 = 3.6867e-04
Loss = 7.5396e-03, PNorm = 163.8346, GNorm = 0.2448, lr_0 = 3.6842e-04
Loss = 7.0032e-03, PNorm = 163.8485, GNorm = 0.2782, lr_0 = 3.6816e-04
Loss = 6.4039e-03, PNorm = 163.8664, GNorm = 0.2432, lr_0 = 3.6791e-04
Loss = 7.1874e-03, PNorm = 163.8803, GNorm = 0.2773, lr_0 = 3.6766e-04
Loss = 6.9342e-03, PNorm = 163.8958, GNorm = 0.1296, lr_0 = 3.6741e-04
Loss = 9.6656e-03, PNorm = 163.9097, GNorm = 0.1890, lr_0 = 3.6716e-04
Loss = 8.5479e-03, PNorm = 163.9272, GNorm = 0.3692, lr_0 = 3.6690e-04
Loss = 6.3289e-03, PNorm = 163.9422, GNorm = 0.1715, lr_0 = 3.6665e-04
Loss = 7.9377e-03, PNorm = 163.9559, GNorm = 0.2046, lr_0 = 3.6640e-04
Loss = 7.9042e-03, PNorm = 163.9728, GNorm = 0.1263, lr_0 = 3.6615e-04
Loss = 6.1097e-03, PNorm = 163.9867, GNorm = 0.5105, lr_0 = 3.6590e-04
Loss = 7.0559e-03, PNorm = 164.0026, GNorm = 0.0955, lr_0 = 3.6565e-04
Loss = 6.0709e-03, PNorm = 164.0152, GNorm = 0.1015, lr_0 = 3.6540e-04
Loss = 6.5606e-03, PNorm = 164.0296, GNorm = 0.2514, lr_0 = 3.6515e-04
Loss = 6.8522e-03, PNorm = 164.0424, GNorm = 0.1729, lr_0 = 3.6490e-04
Loss = 7.6499e-03, PNorm = 164.0580, GNorm = 0.4220, lr_0 = 3.6465e-04
Loss = 7.6601e-03, PNorm = 164.0719, GNorm = 0.4142, lr_0 = 3.6440e-04
Loss = 7.6406e-03, PNorm = 164.0886, GNorm = 0.2106, lr_0 = 3.6415e-04
Loss = 8.1073e-03, PNorm = 164.1031, GNorm = 0.3917, lr_0 = 3.6390e-04
Loss = 6.6187e-03, PNorm = 164.1176, GNorm = 0.2568, lr_0 = 3.6365e-04
Loss = 9.1321e-03, PNorm = 164.1313, GNorm = 0.3492, lr_0 = 3.6340e-04
Loss = 7.6394e-03, PNorm = 164.1450, GNorm = 0.3290, lr_0 = 3.6315e-04
Loss = 6.3840e-03, PNorm = 164.1595, GNorm = 0.3075, lr_0 = 3.6290e-04
Loss = 9.8881e-03, PNorm = 164.1730, GNorm = 0.1478, lr_0 = 3.6266e-04
Loss = 6.4354e-03, PNorm = 164.1873, GNorm = 0.2446, lr_0 = 3.6241e-04
Loss = 7.2567e-03, PNorm = 164.2017, GNorm = 0.0857, lr_0 = 3.6216e-04
Loss = 5.6479e-03, PNorm = 164.2160, GNorm = 0.1543, lr_0 = 3.6191e-04
Loss = 8.5429e-03, PNorm = 164.2279, GNorm = 0.2268, lr_0 = 3.6166e-04
Loss = 6.2797e-03, PNorm = 164.2437, GNorm = 0.1362, lr_0 = 3.6141e-04
Loss = 7.8957e-03, PNorm = 164.2579, GNorm = 0.2285, lr_0 = 3.6117e-04
Loss = 9.8012e-03, PNorm = 164.2723, GNorm = 0.3758, lr_0 = 3.6092e-04
Loss = 7.6232e-03, PNorm = 164.2837, GNorm = 0.3079, lr_0 = 3.6067e-04
Loss = 6.5234e-03, PNorm = 164.2977, GNorm = 0.4882, lr_0 = 3.6043e-04
Loss = 6.7226e-03, PNorm = 164.3119, GNorm = 0.2478, lr_0 = 3.6018e-04
Loss = 8.1027e-03, PNorm = 164.3307, GNorm = 0.2330, lr_0 = 3.5993e-04
Loss = 6.0569e-03, PNorm = 164.3474, GNorm = 0.2872, lr_0 = 3.5969e-04
Loss = 6.7808e-03, PNorm = 164.3640, GNorm = 0.1518, lr_0 = 3.5944e-04
Loss = 1.2556e-02, PNorm = 164.3775, GNorm = 0.1775, lr_0 = 3.5919e-04
Loss = 7.2587e-03, PNorm = 164.3924, GNorm = 0.1235, lr_0 = 3.5895e-04
Loss = 6.7169e-03, PNorm = 164.4073, GNorm = 0.4155, lr_0 = 3.5870e-04
Loss = 7.5153e-03, PNorm = 164.4234, GNorm = 0.3386, lr_0 = 3.5845e-04
Loss = 9.3952e-03, PNorm = 164.4459, GNorm = 0.4257, lr_0 = 3.5821e-04
Loss = 7.8412e-03, PNorm = 164.4671, GNorm = 0.4496, lr_0 = 3.5796e-04
Loss = 7.6208e-03, PNorm = 164.4823, GNorm = 0.4402, lr_0 = 3.5772e-04
Loss = 8.6650e-03, PNorm = 164.4974, GNorm = 0.2651, lr_0 = 3.5747e-04
Loss = 7.5309e-03, PNorm = 164.5099, GNorm = 0.5102, lr_0 = 3.5723e-04
Loss = 7.1787e-03, PNorm = 164.5248, GNorm = 0.2480, lr_0 = 3.5698e-04
Loss = 6.5667e-03, PNorm = 164.5400, GNorm = 0.2930, lr_0 = 3.5674e-04
Loss = 7.9034e-03, PNorm = 164.5570, GNorm = 0.3888, lr_0 = 3.5650e-04
Loss = 7.3555e-03, PNorm = 164.5722, GNorm = 0.6139, lr_0 = 3.5625e-04
Loss = 6.1469e-03, PNorm = 164.5887, GNorm = 0.3240, lr_0 = 3.5601e-04
Loss = 6.3047e-03, PNorm = 164.6017, GNorm = 0.2013, lr_0 = 3.5576e-04
Loss = 5.6868e-03, PNorm = 164.6158, GNorm = 0.1885, lr_0 = 3.5552e-04
Loss = 6.0190e-03, PNorm = 164.6298, GNorm = 0.3052, lr_0 = 3.5528e-04
Loss = 6.5000e-03, PNorm = 164.6438, GNorm = 0.3883, lr_0 = 3.5503e-04
Loss = 6.4827e-03, PNorm = 164.6587, GNorm = 0.3922, lr_0 = 3.5479e-04
Loss = 7.1740e-03, PNorm = 164.6704, GNorm = 0.3754, lr_0 = 3.5455e-04
Loss = 6.7025e-03, PNorm = 164.6868, GNorm = 0.1242, lr_0 = 3.5430e-04
Loss = 5.9300e-03, PNorm = 164.7013, GNorm = 0.1453, lr_0 = 3.5406e-04
Loss = 1.0339e-02, PNorm = 164.7173, GNorm = 0.0835, lr_0 = 3.5382e-04
Loss = 8.2727e-03, PNorm = 164.7325, GNorm = 0.2480, lr_0 = 3.5358e-04
Loss = 6.4983e-03, PNorm = 164.7518, GNorm = 0.4970, lr_0 = 3.5333e-04
Loss = 6.9228e-03, PNorm = 164.7689, GNorm = 0.4755, lr_0 = 3.5309e-04
Loss = 8.3834e-03, PNorm = 164.7804, GNorm = 0.6519, lr_0 = 3.5285e-04
Loss = 9.3243e-03, PNorm = 164.7990, GNorm = 0.4407, lr_0 = 3.5261e-04
Loss = 6.3983e-03, PNorm = 164.8113, GNorm = 0.1609, lr_0 = 3.5237e-04
Loss = 6.4689e-03, PNorm = 164.8275, GNorm = 0.2271, lr_0 = 3.5212e-04
Loss = 6.1749e-03, PNorm = 164.8457, GNorm = 0.1812, lr_0 = 3.5188e-04
Loss = 7.1377e-03, PNorm = 164.8614, GNorm = 0.1164, lr_0 = 3.5164e-04
Loss = 6.2957e-03, PNorm = 164.8755, GNorm = 0.1412, lr_0 = 3.5140e-04
Loss = 8.5023e-03, PNorm = 164.8888, GNorm = 0.2403, lr_0 = 3.5116e-04
Loss = 9.0473e-03, PNorm = 164.9042, GNorm = 0.3181, lr_0 = 3.5092e-04
Loss = 1.0126e-02, PNorm = 164.9210, GNorm = 0.1159, lr_0 = 3.5068e-04
Loss = 6.6395e-03, PNorm = 164.9382, GNorm = 0.4341, lr_0 = 3.5044e-04
Loss = 1.0787e-02, PNorm = 164.9546, GNorm = 0.3366, lr_0 = 3.5020e-04
Loss = 6.2585e-03, PNorm = 164.9733, GNorm = 0.2698, lr_0 = 3.4996e-04
Loss = 7.3280e-03, PNorm = 164.9911, GNorm = 0.3530, lr_0 = 3.4972e-04
Loss = 7.2942e-03, PNorm = 165.0088, GNorm = 0.1536, lr_0 = 3.4948e-04
Loss = 6.3502e-03, PNorm = 165.0280, GNorm = 0.2204, lr_0 = 3.4924e-04
Loss = 7.2711e-03, PNorm = 165.0415, GNorm = 0.3727, lr_0 = 3.4900e-04
Loss = 6.0172e-03, PNorm = 165.0538, GNorm = 0.2273, lr_0 = 3.4876e-04
Loss = 8.7671e-03, PNorm = 165.0664, GNorm = 0.3351, lr_0 = 3.4852e-04
Loss = 6.5910e-03, PNorm = 165.0857, GNorm = 0.2379, lr_0 = 3.4828e-04
Loss = 9.4122e-03, PNorm = 165.1047, GNorm = 0.1965, lr_0 = 3.4805e-04
Loss = 6.1922e-03, PNorm = 165.1232, GNorm = 0.1224, lr_0 = 3.4781e-04
Loss = 5.8468e-03, PNorm = 165.1387, GNorm = 0.2078, lr_0 = 3.4757e-04
Loss = 6.9990e-03, PNorm = 165.1563, GNorm = 0.3859, lr_0 = 3.4733e-04
Loss = 1.3116e-02, PNorm = 165.1733, GNorm = 0.1496, lr_0 = 3.4709e-04
Loss = 8.0635e-03, PNorm = 165.1878, GNorm = 0.2353, lr_0 = 3.4686e-04
Loss = 8.7454e-03, PNorm = 165.1997, GNorm = 0.1067, lr_0 = 3.4662e-04
Loss = 7.6972e-03, PNorm = 165.2190, GNorm = 0.2592, lr_0 = 3.4638e-04
Loss = 7.2321e-03, PNorm = 165.2350, GNorm = 0.2456, lr_0 = 3.4614e-04
Loss = 7.3435e-03, PNorm = 165.2517, GNorm = 0.2545, lr_0 = 3.4591e-04
Loss = 7.5613e-03, PNorm = 165.2729, GNorm = 0.2485, lr_0 = 3.4567e-04
Loss = 7.3613e-03, PNorm = 165.2905, GNorm = 0.3853, lr_0 = 3.4543e-04
Loss = 7.8926e-03, PNorm = 165.3090, GNorm = 0.1630, lr_0 = 3.4520e-04
Loss = 6.8551e-03, PNorm = 165.3288, GNorm = 0.1987, lr_0 = 3.4496e-04
Loss = 6.1280e-03, PNorm = 165.3462, GNorm = 0.2360, lr_0 = 3.4472e-04
Loss = 7.6187e-03, PNorm = 165.3625, GNorm = 0.1583, lr_0 = 3.4449e-04
Loss = 6.1178e-03, PNorm = 165.3760, GNorm = 0.1268, lr_0 = 3.4425e-04
Loss = 6.1479e-03, PNorm = 165.3896, GNorm = 0.2022, lr_0 = 3.4402e-04
Loss = 7.8086e-03, PNorm = 165.4022, GNorm = 0.1834, lr_0 = 3.4378e-04
Loss = 6.5909e-03, PNorm = 165.4181, GNorm = 0.2733, lr_0 = 3.4354e-04
Loss = 5.5443e-03, PNorm = 165.4361, GNorm = 0.4885, lr_0 = 3.4331e-04
Validation mae = 0.280139
Epoch 15
Loss = 6.3032e-03, PNorm = 165.4489, GNorm = 0.4058, lr_0 = 3.4307e-04
Loss = 5.3071e-03, PNorm = 165.4614, GNorm = 0.2703, lr_0 = 3.4284e-04
Loss = 8.6382e-03, PNorm = 165.4737, GNorm = 0.5687, lr_0 = 3.4260e-04
Loss = 6.6127e-03, PNorm = 165.4841, GNorm = 0.5423, lr_0 = 3.4237e-04
Loss = 5.2600e-03, PNorm = 165.4975, GNorm = 0.1705, lr_0 = 3.4213e-04
Loss = 6.4399e-03, PNorm = 165.5113, GNorm = 0.2208, lr_0 = 3.4190e-04
Loss = 6.4272e-03, PNorm = 165.5235, GNorm = 0.1069, lr_0 = 3.4167e-04
Loss = 5.2090e-03, PNorm = 165.5351, GNorm = 0.2272, lr_0 = 3.4143e-04
Loss = 6.4775e-03, PNorm = 165.5446, GNorm = 0.1159, lr_0 = 3.4120e-04
Loss = 6.9786e-03, PNorm = 165.5555, GNorm = 0.1349, lr_0 = 3.4096e-04
Loss = 5.6467e-03, PNorm = 165.5697, GNorm = 0.1876, lr_0 = 3.4073e-04
Loss = 7.3228e-03, PNorm = 165.5828, GNorm = 0.2037, lr_0 = 3.4050e-04
Loss = 7.1360e-03, PNorm = 165.5926, GNorm = 0.2720, lr_0 = 3.4026e-04
Loss = 6.7882e-03, PNorm = 165.6042, GNorm = 0.1862, lr_0 = 3.4003e-04
Loss = 5.9195e-03, PNorm = 165.6159, GNorm = 0.1133, lr_0 = 3.3980e-04
Loss = 5.2225e-03, PNorm = 165.6267, GNorm = 0.1649, lr_0 = 3.3956e-04
Loss = 6.5017e-03, PNorm = 165.6371, GNorm = 0.2872, lr_0 = 3.3933e-04
Loss = 4.9176e-03, PNorm = 165.6470, GNorm = 0.3697, lr_0 = 3.3910e-04
Loss = 6.0590e-03, PNorm = 165.6582, GNorm = 0.5039, lr_0 = 3.3887e-04
Loss = 6.6641e-03, PNorm = 165.6685, GNorm = 0.1687, lr_0 = 3.3864e-04
Loss = 5.8204e-03, PNorm = 165.6804, GNorm = 0.1180, lr_0 = 3.3840e-04
Loss = 5.4981e-03, PNorm = 165.6930, GNorm = 0.0795, lr_0 = 3.3817e-04
Loss = 8.8048e-03, PNorm = 165.7023, GNorm = 0.1589, lr_0 = 3.3794e-04
Loss = 6.8495e-03, PNorm = 165.7133, GNorm = 0.3287, lr_0 = 3.3771e-04
Loss = 5.9486e-03, PNorm = 165.7245, GNorm = 0.4157, lr_0 = 3.3748e-04
Loss = 5.8465e-03, PNorm = 165.7397, GNorm = 0.4346, lr_0 = 3.3725e-04
Loss = 7.5813e-03, PNorm = 165.7539, GNorm = 0.2031, lr_0 = 3.3701e-04
Loss = 6.9316e-03, PNorm = 165.7650, GNorm = 0.0937, lr_0 = 3.3678e-04
Loss = 5.2763e-03, PNorm = 165.7759, GNorm = 0.2113, lr_0 = 3.3655e-04
Loss = 6.8594e-03, PNorm = 165.7865, GNorm = 0.1652, lr_0 = 3.3632e-04
Loss = 5.8839e-03, PNorm = 165.7982, GNorm = 0.3361, lr_0 = 3.3609e-04
Loss = 5.7500e-03, PNorm = 165.8109, GNorm = 0.0928, lr_0 = 3.3586e-04
Loss = 6.8108e-03, PNorm = 165.8232, GNorm = 0.2387, lr_0 = 3.3563e-04
Loss = 5.9500e-03, PNorm = 165.8383, GNorm = 0.1926, lr_0 = 3.3540e-04
Loss = 7.0281e-03, PNorm = 165.8513, GNorm = 0.6198, lr_0 = 3.3517e-04
Loss = 5.6908e-03, PNorm = 165.8639, GNorm = 0.2604, lr_0 = 3.3494e-04
Loss = 5.4394e-03, PNorm = 165.8765, GNorm = 0.3337, lr_0 = 3.3471e-04
Loss = 4.7884e-03, PNorm = 165.8918, GNorm = 0.1900, lr_0 = 3.3448e-04
Loss = 6.4698e-03, PNorm = 165.9039, GNorm = 0.1554, lr_0 = 3.3425e-04
Loss = 6.2067e-03, PNorm = 165.9165, GNorm = 0.3035, lr_0 = 3.3403e-04
Loss = 5.4673e-03, PNorm = 165.9269, GNorm = 0.4904, lr_0 = 3.3380e-04
Loss = 6.6503e-03, PNorm = 165.9392, GNorm = 0.1012, lr_0 = 3.3357e-04
Loss = 7.2399e-03, PNorm = 165.9517, GNorm = 0.2723, lr_0 = 3.3334e-04
Loss = 6.0870e-03, PNorm = 165.9594, GNorm = 0.2335, lr_0 = 3.3311e-04
Loss = 6.0115e-03, PNorm = 165.9718, GNorm = 0.4316, lr_0 = 3.3288e-04
Loss = 4.9273e-03, PNorm = 165.9863, GNorm = 0.3067, lr_0 = 3.3265e-04
Loss = 6.0600e-03, PNorm = 166.0035, GNorm = 0.0695, lr_0 = 3.3243e-04
Loss = 7.8564e-03, PNorm = 166.0160, GNorm = 0.2109, lr_0 = 3.3220e-04
Loss = 6.7667e-03, PNorm = 166.0264, GNorm = 0.1911, lr_0 = 3.3197e-04
Loss = 5.1712e-03, PNorm = 166.0416, GNorm = 0.2798, lr_0 = 3.3174e-04
Loss = 8.6841e-03, PNorm = 166.0559, GNorm = 0.2183, lr_0 = 3.3152e-04
Loss = 7.3161e-03, PNorm = 166.0694, GNorm = 0.3193, lr_0 = 3.3129e-04
Loss = 8.1713e-03, PNorm = 166.0839, GNorm = 0.2428, lr_0 = 3.3106e-04
Loss = 6.2585e-03, PNorm = 166.0953, GNorm = 0.2671, lr_0 = 3.3084e-04
Loss = 6.5950e-03, PNorm = 166.1068, GNorm = 0.1544, lr_0 = 3.3061e-04
Loss = 6.5128e-03, PNorm = 166.1194, GNorm = 0.0737, lr_0 = 3.3038e-04
Loss = 6.9947e-03, PNorm = 166.1319, GNorm = 0.1573, lr_0 = 3.3016e-04
Loss = 4.7237e-03, PNorm = 166.1444, GNorm = 0.2419, lr_0 = 3.2993e-04
Loss = 7.5316e-03, PNorm = 166.1590, GNorm = 0.5556, lr_0 = 3.2970e-04
Loss = 7.4532e-03, PNorm = 166.1718, GNorm = 0.1155, lr_0 = 3.2948e-04
Loss = 5.3313e-03, PNorm = 166.1843, GNorm = 0.4606, lr_0 = 3.2925e-04
Loss = 7.6096e-03, PNorm = 166.1936, GNorm = 0.3532, lr_0 = 3.2903e-04
Loss = 5.9937e-03, PNorm = 166.2066, GNorm = 0.1537, lr_0 = 3.2880e-04
Loss = 5.0381e-03, PNorm = 166.2209, GNorm = 0.3723, lr_0 = 3.2858e-04
Loss = 7.3358e-03, PNorm = 166.2345, GNorm = 0.1697, lr_0 = 3.2835e-04
Loss = 6.9960e-03, PNorm = 166.2490, GNorm = 0.1462, lr_0 = 3.2813e-04
Loss = 5.4683e-03, PNorm = 166.2650, GNorm = 0.1303, lr_0 = 3.2790e-04
Loss = 5.4174e-03, PNorm = 166.2782, GNorm = 0.1422, lr_0 = 3.2768e-04
Loss = 5.9896e-03, PNorm = 166.2914, GNorm = 0.2278, lr_0 = 3.2745e-04
Loss = 5.8140e-03, PNorm = 166.3048, GNorm = 0.0660, lr_0 = 3.2723e-04
Loss = 6.4498e-03, PNorm = 166.3210, GNorm = 0.2259, lr_0 = 3.2700e-04
Loss = 4.9023e-03, PNorm = 166.3319, GNorm = 0.2712, lr_0 = 3.2678e-04
Loss = 4.6538e-03, PNorm = 166.3438, GNorm = 0.2385, lr_0 = 3.2656e-04
Loss = 7.0107e-03, PNorm = 166.3572, GNorm = 0.5794, lr_0 = 3.2633e-04
Loss = 5.2534e-03, PNorm = 166.3742, GNorm = 0.3210, lr_0 = 3.2611e-04
Loss = 6.5211e-03, PNorm = 166.3875, GNorm = 0.4299, lr_0 = 3.2589e-04
Loss = 5.2003e-03, PNorm = 166.4013, GNorm = 0.1949, lr_0 = 3.2566e-04
Loss = 5.8472e-03, PNorm = 166.4135, GNorm = 0.3533, lr_0 = 3.2544e-04
Loss = 6.4433e-03, PNorm = 166.4260, GNorm = 0.1263, lr_0 = 3.2522e-04
Loss = 8.0746e-03, PNorm = 166.4352, GNorm = 0.5163, lr_0 = 3.2499e-04
Loss = 6.6371e-03, PNorm = 166.4481, GNorm = 0.1922, lr_0 = 3.2477e-04
Loss = 5.7613e-03, PNorm = 166.4609, GNorm = 0.1393, lr_0 = 3.2455e-04
Loss = 5.8801e-03, PNorm = 166.4729, GNorm = 0.2199, lr_0 = 3.2433e-04
Loss = 5.6830e-03, PNorm = 166.4867, GNorm = 0.1523, lr_0 = 3.2410e-04
Loss = 5.8423e-03, PNorm = 166.4980, GNorm = 0.4101, lr_0 = 3.2388e-04
Loss = 5.2284e-03, PNorm = 166.5090, GNorm = 0.1408, lr_0 = 3.2366e-04
Loss = 5.8606e-03, PNorm = 166.5206, GNorm = 0.1795, lr_0 = 3.2344e-04
Loss = 6.7637e-03, PNorm = 166.5343, GNorm = 0.2587, lr_0 = 3.2322e-04
Loss = 5.5355e-03, PNorm = 166.5495, GNorm = 0.2232, lr_0 = 3.2300e-04
Loss = 5.9141e-03, PNorm = 166.5636, GNorm = 0.1965, lr_0 = 3.2277e-04
Loss = 5.6303e-03, PNorm = 166.5756, GNorm = 0.1863, lr_0 = 3.2255e-04
Loss = 5.9311e-03, PNorm = 166.5885, GNorm = 0.3405, lr_0 = 3.2233e-04
Loss = 5.2142e-03, PNorm = 166.6056, GNorm = 0.0726, lr_0 = 3.2211e-04
Loss = 6.6673e-03, PNorm = 166.6207, GNorm = 0.1613, lr_0 = 3.2189e-04
Loss = 5.9227e-03, PNorm = 166.6334, GNorm = 0.1780, lr_0 = 3.2167e-04
Loss = 8.8268e-03, PNorm = 166.6429, GNorm = 0.2984, lr_0 = 3.2145e-04
Loss = 5.6220e-03, PNorm = 166.6572, GNorm = 0.1340, lr_0 = 3.2123e-04
Loss = 5.6382e-03, PNorm = 166.6702, GNorm = 0.0918, lr_0 = 3.2101e-04
Loss = 5.6444e-03, PNorm = 166.6857, GNorm = 0.1369, lr_0 = 3.2079e-04
Loss = 5.1537e-03, PNorm = 166.6989, GNorm = 0.1637, lr_0 = 3.2057e-04
Loss = 5.0619e-03, PNorm = 166.7125, GNorm = 0.2029, lr_0 = 3.2035e-04
Loss = 6.3416e-03, PNorm = 166.7229, GNorm = 0.1807, lr_0 = 3.2013e-04
Loss = 6.0169e-03, PNorm = 166.7386, GNorm = 0.2910, lr_0 = 3.1991e-04
Loss = 6.6034e-03, PNorm = 166.7516, GNorm = 0.1994, lr_0 = 3.1969e-04
Loss = 6.1900e-03, PNorm = 166.7681, GNorm = 0.2201, lr_0 = 3.1947e-04
Loss = 4.7842e-03, PNorm = 166.7815, GNorm = 0.1585, lr_0 = 3.1925e-04
Loss = 8.2347e-03, PNorm = 166.7944, GNorm = 0.3523, lr_0 = 3.1904e-04
Loss = 9.1383e-03, PNorm = 166.8101, GNorm = 0.2340, lr_0 = 3.1882e-04
Loss = 6.1436e-03, PNorm = 166.8268, GNorm = 0.1014, lr_0 = 3.1860e-04
Loss = 6.3259e-03, PNorm = 166.8402, GNorm = 0.1650, lr_0 = 3.1838e-04
Loss = 5.2911e-03, PNorm = 166.8499, GNorm = 0.1423, lr_0 = 3.1816e-04
Loss = 6.8060e-03, PNorm = 166.8619, GNorm = 0.1749, lr_0 = 3.1794e-04
Loss = 6.4641e-03, PNorm = 166.8745, GNorm = 0.2832, lr_0 = 3.1773e-04
Loss = 8.2549e-03, PNorm = 166.8913, GNorm = 0.2669, lr_0 = 3.1751e-04
Loss = 5.4384e-03, PNorm = 166.9066, GNorm = 0.1450, lr_0 = 3.1729e-04
Loss = 4.6801e-03, PNorm = 166.9205, GNorm = 0.1671, lr_0 = 3.1707e-04
Loss = 7.5599e-03, PNorm = 166.9332, GNorm = 0.1575, lr_0 = 3.1686e-04
Loss = 5.5574e-03, PNorm = 166.9469, GNorm = 0.3830, lr_0 = 3.1664e-04
Loss = 6.5227e-03, PNorm = 166.9600, GNorm = 0.3556, lr_0 = 3.1642e-04
Loss = 6.7045e-03, PNorm = 166.9702, GNorm = 0.1383, lr_0 = 3.1621e-04
Validation mae = 0.280070
Epoch 16
Loss = 6.2726e-03, PNorm = 166.9817, GNorm = 0.3820, lr_0 = 3.1599e-04
Loss = 7.1821e-03, PNorm = 166.9950, GNorm = 0.2571, lr_0 = 3.1577e-04
Loss = 4.8525e-03, PNorm = 167.0084, GNorm = 0.2710, lr_0 = 3.1556e-04
Loss = 5.8484e-03, PNorm = 167.0153, GNorm = 0.3911, lr_0 = 3.1534e-04
Loss = 5.2514e-03, PNorm = 167.0235, GNorm = 0.2303, lr_0 = 3.1512e-04
Loss = 6.0025e-03, PNorm = 167.0333, GNorm = 0.1504, lr_0 = 3.1491e-04
Loss = 5.3394e-03, PNorm = 167.0414, GNorm = 0.1176, lr_0 = 3.1469e-04
Loss = 5.2003e-03, PNorm = 167.0519, GNorm = 0.2420, lr_0 = 3.1448e-04
Loss = 5.2017e-03, PNorm = 167.0629, GNorm = 0.1738, lr_0 = 3.1426e-04
Loss = 5.6913e-03, PNorm = 167.0731, GNorm = 0.0838, lr_0 = 3.1405e-04
Loss = 4.6322e-03, PNorm = 167.0829, GNorm = 0.2312, lr_0 = 3.1383e-04
Loss = 3.9219e-03, PNorm = 167.0925, GNorm = 0.2738, lr_0 = 3.1362e-04
Loss = 6.0130e-03, PNorm = 167.1001, GNorm = 0.2160, lr_0 = 3.1340e-04
Loss = 4.4727e-03, PNorm = 167.1111, GNorm = 0.1843, lr_0 = 3.1319e-04
Loss = 5.4687e-03, PNorm = 167.1171, GNorm = 0.3429, lr_0 = 3.1297e-04
Loss = 6.3146e-03, PNorm = 167.1280, GNorm = 0.1773, lr_0 = 3.1276e-04
Loss = 7.3420e-03, PNorm = 167.1375, GNorm = 0.1390, lr_0 = 3.1254e-04
Loss = 4.2554e-03, PNorm = 167.1491, GNorm = 0.1341, lr_0 = 3.1233e-04
Loss = 5.3351e-03, PNorm = 167.1580, GNorm = 0.2450, lr_0 = 3.1212e-04
Loss = 4.9319e-03, PNorm = 167.1654, GNorm = 0.2200, lr_0 = 3.1190e-04
Loss = 5.4651e-03, PNorm = 167.1713, GNorm = 0.1259, lr_0 = 3.1169e-04
Loss = 6.1199e-03, PNorm = 167.1840, GNorm = 0.1387, lr_0 = 3.1147e-04
Loss = 6.3053e-03, PNorm = 167.1957, GNorm = 0.3810, lr_0 = 3.1126e-04
Loss = 5.7957e-03, PNorm = 167.2091, GNorm = 0.1651, lr_0 = 3.1105e-04
Loss = 5.4385e-03, PNorm = 167.2242, GNorm = 0.3086, lr_0 = 3.1083e-04
Loss = 5.2614e-03, PNorm = 167.2392, GNorm = 0.2426, lr_0 = 3.1062e-04
Loss = 4.4341e-03, PNorm = 167.2513, GNorm = 0.1971, lr_0 = 3.1041e-04
Loss = 4.5677e-03, PNorm = 167.2610, GNorm = 0.1115, lr_0 = 3.1020e-04
Loss = 5.1748e-03, PNorm = 167.2679, GNorm = 0.1850, lr_0 = 3.0998e-04
Loss = 4.6519e-03, PNorm = 167.2773, GNorm = 0.2049, lr_0 = 3.0977e-04
Loss = 4.5285e-03, PNorm = 167.2872, GNorm = 0.1604, lr_0 = 3.0956e-04
Loss = 6.2414e-03, PNorm = 167.2987, GNorm = 0.1653, lr_0 = 3.0935e-04
Loss = 4.2666e-03, PNorm = 167.3071, GNorm = 0.0797, lr_0 = 3.0914e-04
Loss = 4.6570e-03, PNorm = 167.3153, GNorm = 0.4064, lr_0 = 3.0892e-04
Loss = 5.5739e-03, PNorm = 167.3259, GNorm = 0.1285, lr_0 = 3.0871e-04
Loss = 4.4993e-03, PNorm = 167.3368, GNorm = 0.1965, lr_0 = 3.0850e-04
Loss = 6.4331e-03, PNorm = 167.3465, GNorm = 0.1027, lr_0 = 3.0829e-04
Loss = 6.3667e-03, PNorm = 167.3563, GNorm = 0.2947, lr_0 = 3.0808e-04
Loss = 4.0395e-03, PNorm = 167.3652, GNorm = 0.2277, lr_0 = 3.0787e-04
Loss = 4.3557e-03, PNorm = 167.3768, GNorm = 0.1650, lr_0 = 3.0766e-04
Loss = 5.7408e-03, PNorm = 167.3855, GNorm = 0.2343, lr_0 = 3.0745e-04
Loss = 3.7484e-03, PNorm = 167.3956, GNorm = 0.2284, lr_0 = 3.0723e-04
Loss = 5.7535e-03, PNorm = 167.4033, GNorm = 0.3215, lr_0 = 3.0702e-04
Loss = 5.4551e-03, PNorm = 167.4103, GNorm = 0.0667, lr_0 = 3.0681e-04
Loss = 4.2105e-03, PNorm = 167.4196, GNorm = 0.1659, lr_0 = 3.0660e-04
Loss = 4.6608e-03, PNorm = 167.4268, GNorm = 0.4113, lr_0 = 3.0639e-04
Loss = 4.2287e-03, PNorm = 167.4379, GNorm = 0.0658, lr_0 = 3.0618e-04
Loss = 7.0900e-03, PNorm = 167.4517, GNorm = 0.1919, lr_0 = 3.0597e-04
Loss = 4.7351e-03, PNorm = 167.4629, GNorm = 0.1544, lr_0 = 3.0576e-04
Loss = 5.0193e-03, PNorm = 167.4718, GNorm = 0.1438, lr_0 = 3.0555e-04
Loss = 4.4326e-03, PNorm = 167.4805, GNorm = 0.1302, lr_0 = 3.0535e-04
Loss = 4.8015e-03, PNorm = 167.4941, GNorm = 0.2626, lr_0 = 3.0514e-04
Loss = 4.5014e-03, PNorm = 167.5061, GNorm = 0.1412, lr_0 = 3.0493e-04
Loss = 5.5012e-03, PNorm = 167.5157, GNorm = 0.2917, lr_0 = 3.0472e-04
Loss = 4.9909e-03, PNorm = 167.5239, GNorm = 0.1595, lr_0 = 3.0451e-04
Loss = 4.3968e-03, PNorm = 167.5302, GNorm = 0.2499, lr_0 = 3.0430e-04
Loss = 6.9166e-03, PNorm = 167.5394, GNorm = 0.3135, lr_0 = 3.0409e-04
Loss = 5.1386e-03, PNorm = 167.5484, GNorm = 0.2566, lr_0 = 3.0388e-04
Loss = 4.2148e-03, PNorm = 167.5583, GNorm = 0.1677, lr_0 = 3.0368e-04
Loss = 4.2377e-03, PNorm = 167.5686, GNorm = 0.2083, lr_0 = 3.0347e-04
Loss = 5.2204e-03, PNorm = 167.5772, GNorm = 0.3396, lr_0 = 3.0326e-04
Loss = 3.9719e-03, PNorm = 167.5897, GNorm = 0.1904, lr_0 = 3.0305e-04
Loss = 7.2824e-03, PNorm = 167.5993, GNorm = 0.1165, lr_0 = 3.0284e-04
Loss = 4.6655e-03, PNorm = 167.6111, GNorm = 0.1841, lr_0 = 3.0264e-04
Loss = 4.3395e-03, PNorm = 167.6239, GNorm = 0.5272, lr_0 = 3.0243e-04
Loss = 4.3413e-03, PNorm = 167.6323, GNorm = 0.3812, lr_0 = 3.0222e-04
Loss = 6.2145e-03, PNorm = 167.6402, GNorm = 0.2589, lr_0 = 3.0202e-04
Loss = 3.9854e-03, PNorm = 167.6484, GNorm = 0.1322, lr_0 = 3.0181e-04
Loss = 5.1588e-03, PNorm = 167.6585, GNorm = 0.5612, lr_0 = 3.0160e-04
Loss = 5.5595e-03, PNorm = 167.6686, GNorm = 0.1196, lr_0 = 3.0140e-04
Loss = 4.9038e-03, PNorm = 167.6783, GNorm = 0.2222, lr_0 = 3.0119e-04
Loss = 4.9720e-03, PNorm = 167.6882, GNorm = 0.3811, lr_0 = 3.0098e-04
Loss = 6.2745e-03, PNorm = 167.6971, GNorm = 0.3140, lr_0 = 3.0078e-04
Loss = 4.2886e-03, PNorm = 167.7058, GNorm = 0.1344, lr_0 = 3.0057e-04
Loss = 4.4922e-03, PNorm = 167.7172, GNorm = 0.2603, lr_0 = 3.0036e-04
Loss = 9.8841e-03, PNorm = 167.7297, GNorm = 0.1920, lr_0 = 3.0016e-04
Loss = 7.3769e-03, PNorm = 167.7389, GNorm = 0.2230, lr_0 = 2.9995e-04
Loss = 4.1938e-03, PNorm = 167.7518, GNorm = 0.3060, lr_0 = 2.9975e-04
Loss = 6.8827e-03, PNorm = 167.7644, GNorm = 0.1369, lr_0 = 2.9954e-04
Loss = 7.6879e-03, PNorm = 167.7744, GNorm = 0.1952, lr_0 = 2.9934e-04
Loss = 5.6355e-03, PNorm = 167.7843, GNorm = 0.0748, lr_0 = 2.9913e-04
Loss = 4.7299e-03, PNorm = 167.7955, GNorm = 0.3287, lr_0 = 2.9893e-04
Loss = 4.8676e-03, PNorm = 167.8095, GNorm = 0.2568, lr_0 = 2.9872e-04
Loss = 5.8266e-03, PNorm = 167.8224, GNorm = 0.2394, lr_0 = 2.9852e-04
Loss = 5.0028e-03, PNorm = 167.8305, GNorm = 0.2508, lr_0 = 2.9831e-04
Loss = 5.5372e-03, PNorm = 167.8391, GNorm = 0.2371, lr_0 = 2.9811e-04
Loss = 4.5419e-03, PNorm = 167.8503, GNorm = 0.1510, lr_0 = 2.9790e-04
Loss = 5.1625e-03, PNorm = 167.8612, GNorm = 0.1102, lr_0 = 2.9770e-04
Loss = 5.0619e-03, PNorm = 167.8727, GNorm = 0.4026, lr_0 = 2.9750e-04
Loss = 5.1135e-03, PNorm = 167.8834, GNorm = 0.3057, lr_0 = 2.9729e-04
Loss = 4.2392e-03, PNorm = 167.8959, GNorm = 0.1871, lr_0 = 2.9709e-04
Loss = 5.9249e-03, PNorm = 167.9048, GNorm = 0.0651, lr_0 = 2.9689e-04
Loss = 4.3954e-03, PNorm = 167.9162, GNorm = 0.1799, lr_0 = 2.9668e-04
Loss = 6.8179e-03, PNorm = 167.9246, GNorm = 0.1800, lr_0 = 2.9648e-04
Loss = 5.7126e-03, PNorm = 167.9362, GNorm = 0.2362, lr_0 = 2.9628e-04
Loss = 6.5267e-03, PNorm = 167.9447, GNorm = 0.1495, lr_0 = 2.9607e-04
Loss = 6.5084e-03, PNorm = 167.9556, GNorm = 0.3203, lr_0 = 2.9587e-04
Loss = 1.2062e-02, PNorm = 167.9625, GNorm = 0.1817, lr_0 = 2.9567e-04
Loss = 6.9313e-03, PNorm = 167.9736, GNorm = 0.1874, lr_0 = 2.9546e-04
Loss = 4.9272e-03, PNorm = 167.9868, GNorm = 0.1886, lr_0 = 2.9526e-04
Loss = 4.1927e-03, PNorm = 168.0010, GNorm = 0.2875, lr_0 = 2.9506e-04
Loss = 4.5467e-03, PNorm = 168.0131, GNorm = 0.1190, lr_0 = 2.9486e-04
Loss = 5.0202e-03, PNorm = 168.0237, GNorm = 0.2680, lr_0 = 2.9466e-04
Loss = 5.0062e-03, PNorm = 168.0382, GNorm = 0.0682, lr_0 = 2.9445e-04
Loss = 4.4473e-03, PNorm = 168.0498, GNorm = 0.1832, lr_0 = 2.9425e-04
Loss = 3.6380e-03, PNorm = 168.0611, GNorm = 0.3343, lr_0 = 2.9405e-04
Loss = 4.8661e-03, PNorm = 168.0715, GNorm = 0.1401, lr_0 = 2.9385e-04
Loss = 5.1428e-03, PNorm = 168.0839, GNorm = 0.0723, lr_0 = 2.9365e-04
Loss = 5.4687e-03, PNorm = 168.0962, GNorm = 0.1877, lr_0 = 2.9345e-04
Loss = 4.2323e-03, PNorm = 168.1083, GNorm = 0.2442, lr_0 = 2.9325e-04
Loss = 5.4124e-03, PNorm = 168.1165, GNorm = 0.0809, lr_0 = 2.9305e-04
Loss = 4.7445e-03, PNorm = 168.1268, GNorm = 0.1175, lr_0 = 2.9284e-04
Loss = 4.9433e-03, PNorm = 168.1379, GNorm = 0.3166, lr_0 = 2.9264e-04
Loss = 5.3872e-03, PNorm = 168.1498, GNorm = 0.1276, lr_0 = 2.9244e-04
Loss = 6.0300e-03, PNorm = 168.1645, GNorm = 0.1608, lr_0 = 2.9224e-04
Loss = 6.4828e-03, PNorm = 168.1778, GNorm = 0.3563, lr_0 = 2.9204e-04
Loss = 6.6212e-03, PNorm = 168.1904, GNorm = 0.3087, lr_0 = 2.9184e-04
Loss = 6.4315e-03, PNorm = 168.2043, GNorm = 0.1148, lr_0 = 2.9164e-04
Loss = 3.7321e-03, PNorm = 168.2164, GNorm = 0.1706, lr_0 = 2.9144e-04
Loss = 5.6064e-03, PNorm = 168.2257, GNorm = 0.2903, lr_0 = 2.9124e-04
Validation mae = 0.279538
Epoch 17
Loss = 4.7645e-03, PNorm = 168.2341, GNorm = 0.1646, lr_0 = 2.9104e-04
Loss = 4.5429e-03, PNorm = 168.2416, GNorm = 0.1333, lr_0 = 2.9084e-04
Loss = 4.0154e-03, PNorm = 168.2513, GNorm = 0.1696, lr_0 = 2.9065e-04
Loss = 4.8164e-03, PNorm = 168.2610, GNorm = 0.3850, lr_0 = 2.9045e-04
Loss = 5.2825e-03, PNorm = 168.2695, GNorm = 0.6682, lr_0 = 2.9025e-04
Loss = 4.1400e-03, PNorm = 168.2739, GNorm = 0.2707, lr_0 = 2.9005e-04
Loss = 4.7479e-03, PNorm = 168.2796, GNorm = 0.2073, lr_0 = 2.8985e-04
Loss = 5.0076e-03, PNorm = 168.2887, GNorm = 0.4247, lr_0 = 2.8965e-04
Loss = 3.7911e-03, PNorm = 168.2973, GNorm = 0.1934, lr_0 = 2.8945e-04
Loss = 3.8566e-03, PNorm = 168.3080, GNorm = 0.0671, lr_0 = 2.8925e-04
Loss = 4.4652e-03, PNorm = 168.3181, GNorm = 0.1842, lr_0 = 2.8906e-04
Loss = 4.1670e-03, PNorm = 168.3246, GNorm = 0.1622, lr_0 = 2.8886e-04
Loss = 5.5255e-03, PNorm = 168.3307, GNorm = 0.0746, lr_0 = 2.8866e-04
Loss = 4.0418e-03, PNorm = 168.3379, GNorm = 0.2538, lr_0 = 2.8846e-04
Loss = 3.5602e-03, PNorm = 168.3468, GNorm = 0.1662, lr_0 = 2.8826e-04
Loss = 4.3316e-03, PNorm = 168.3531, GNorm = 0.2166, lr_0 = 2.8807e-04
Loss = 3.8557e-03, PNorm = 168.3616, GNorm = 0.0801, lr_0 = 2.8787e-04
Loss = 4.0165e-03, PNorm = 168.3724, GNorm = 0.1554, lr_0 = 2.8767e-04
Loss = 3.8010e-03, PNorm = 168.3817, GNorm = 0.2345, lr_0 = 2.8748e-04
Loss = 4.6759e-03, PNorm = 168.3905, GNorm = 0.1584, lr_0 = 2.8728e-04
Loss = 4.2527e-03, PNorm = 168.3999, GNorm = 0.3053, lr_0 = 2.8708e-04
Loss = 4.9523e-03, PNorm = 168.4059, GNorm = 0.2593, lr_0 = 2.8689e-04
Loss = 5.2106e-03, PNorm = 168.4142, GNorm = 0.2079, lr_0 = 2.8669e-04
Loss = 4.7487e-03, PNorm = 168.4247, GNorm = 0.1766, lr_0 = 2.8649e-04
Loss = 5.7697e-03, PNorm = 168.4327, GNorm = 0.1436, lr_0 = 2.8630e-04
Loss = 3.7863e-03, PNorm = 168.4392, GNorm = 0.1410, lr_0 = 2.8610e-04
Loss = 3.9511e-03, PNorm = 168.4472, GNorm = 0.1807, lr_0 = 2.8590e-04
Loss = 4.6748e-03, PNorm = 168.4524, GNorm = 0.2550, lr_0 = 2.8571e-04
Loss = 4.3719e-03, PNorm = 168.4583, GNorm = 0.2245, lr_0 = 2.8551e-04
Loss = 5.4983e-03, PNorm = 168.4669, GNorm = 0.1718, lr_0 = 2.8532e-04
Loss = 6.6830e-03, PNorm = 168.4770, GNorm = 0.1767, lr_0 = 2.8512e-04
Loss = 4.0017e-03, PNorm = 168.4861, GNorm = 0.2068, lr_0 = 2.8493e-04
Loss = 6.0605e-03, PNorm = 168.4962, GNorm = 0.1077, lr_0 = 2.8473e-04
Loss = 5.1128e-03, PNorm = 168.5076, GNorm = 0.2909, lr_0 = 2.8454e-04
Loss = 3.9390e-03, PNorm = 168.5167, GNorm = 0.0955, lr_0 = 2.8434e-04
Loss = 4.2675e-03, PNorm = 168.5252, GNorm = 0.0751, lr_0 = 2.8415e-04
Loss = 5.6173e-03, PNorm = 168.5351, GNorm = 0.5685, lr_0 = 2.8395e-04
Loss = 3.7837e-03, PNorm = 168.5415, GNorm = 0.1886, lr_0 = 2.8376e-04
Loss = 3.5215e-03, PNorm = 168.5520, GNorm = 0.2972, lr_0 = 2.8356e-04
Loss = 3.6414e-03, PNorm = 168.5598, GNorm = 0.1429, lr_0 = 2.8337e-04
Loss = 4.0921e-03, PNorm = 168.5661, GNorm = 0.1322, lr_0 = 2.8317e-04
Loss = 4.5547e-03, PNorm = 168.5745, GNorm = 0.1347, lr_0 = 2.8298e-04
Loss = 4.7508e-03, PNorm = 168.5835, GNorm = 0.2555, lr_0 = 2.8279e-04
Loss = 8.0767e-03, PNorm = 168.5947, GNorm = 0.4653, lr_0 = 2.8259e-04
Loss = 3.3533e-03, PNorm = 168.6072, GNorm = 0.2017, lr_0 = 2.8240e-04
Loss = 4.0476e-03, PNorm = 168.6156, GNorm = 0.1902, lr_0 = 2.8221e-04
Loss = 5.9879e-03, PNorm = 168.6223, GNorm = 0.2611, lr_0 = 2.8201e-04
Loss = 4.2612e-03, PNorm = 168.6323, GNorm = 0.3118, lr_0 = 2.8182e-04
Loss = 4.3822e-03, PNorm = 168.6430, GNorm = 0.2681, lr_0 = 2.8163e-04
Loss = 4.5740e-03, PNorm = 168.6548, GNorm = 0.2051, lr_0 = 2.8143e-04
Loss = 4.4707e-03, PNorm = 168.6625, GNorm = 0.0868, lr_0 = 2.8124e-04
Loss = 4.4337e-03, PNorm = 168.6719, GNorm = 0.1090, lr_0 = 2.8105e-04
Loss = 4.8441e-03, PNorm = 168.6796, GNorm = 0.3477, lr_0 = 2.8085e-04
Loss = 3.4303e-03, PNorm = 168.6889, GNorm = 0.2566, lr_0 = 2.8066e-04
Loss = 5.2407e-03, PNorm = 168.6979, GNorm = 0.0812, lr_0 = 2.8047e-04
Loss = 4.4197e-03, PNorm = 168.7055, GNorm = 0.1391, lr_0 = 2.8028e-04
Loss = 5.6363e-03, PNorm = 168.7132, GNorm = 0.1270, lr_0 = 2.8009e-04
Loss = 4.6423e-03, PNorm = 168.7234, GNorm = 0.1499, lr_0 = 2.7989e-04
Loss = 3.9062e-03, PNorm = 168.7309, GNorm = 0.1970, lr_0 = 2.7970e-04
Loss = 5.6774e-03, PNorm = 168.7420, GNorm = 0.0826, lr_0 = 2.7951e-04
Loss = 3.9346e-03, PNorm = 168.7534, GNorm = 0.1215, lr_0 = 2.7932e-04
Loss = 3.7634e-03, PNorm = 168.7642, GNorm = 0.2131, lr_0 = 2.7913e-04
Loss = 4.7100e-03, PNorm = 168.7718, GNorm = 0.2302, lr_0 = 2.7894e-04
Loss = 6.6385e-03, PNorm = 168.7802, GNorm = 0.1853, lr_0 = 2.7875e-04
Loss = 3.9392e-03, PNorm = 168.7919, GNorm = 0.2499, lr_0 = 2.7855e-04
Loss = 4.9190e-03, PNorm = 168.8011, GNorm = 0.1176, lr_0 = 2.7836e-04
Loss = 5.5574e-03, PNorm = 168.8095, GNorm = 0.3671, lr_0 = 2.7817e-04
Loss = 4.5669e-03, PNorm = 168.8175, GNorm = 0.2258, lr_0 = 2.7798e-04
Loss = 5.7161e-03, PNorm = 168.8286, GNorm = 0.1221, lr_0 = 2.7779e-04
Loss = 4.4748e-03, PNorm = 168.8368, GNorm = 0.2949, lr_0 = 2.7760e-04
Loss = 3.6704e-03, PNorm = 168.8456, GNorm = 0.2461, lr_0 = 2.7741e-04
Loss = 3.7718e-03, PNorm = 168.8529, GNorm = 0.1331, lr_0 = 2.7722e-04
Loss = 3.5355e-03, PNorm = 168.8631, GNorm = 0.1228, lr_0 = 2.7703e-04
Loss = 4.4469e-03, PNorm = 168.8707, GNorm = 0.1331, lr_0 = 2.7684e-04
Loss = 4.1919e-03, PNorm = 168.8806, GNorm = 0.0882, lr_0 = 2.7665e-04
Loss = 5.3025e-03, PNorm = 168.8885, GNorm = 0.1316, lr_0 = 2.7646e-04
Loss = 4.9970e-03, PNorm = 168.8970, GNorm = 0.1772, lr_0 = 2.7627e-04
Loss = 5.2101e-03, PNorm = 168.9070, GNorm = 0.2597, lr_0 = 2.7608e-04
Loss = 4.1282e-03, PNorm = 168.9155, GNorm = 0.1233, lr_0 = 2.7590e-04
Loss = 5.1665e-03, PNorm = 168.9235, GNorm = 0.1071, lr_0 = 2.7571e-04
Loss = 4.1826e-03, PNorm = 168.9348, GNorm = 0.1323, lr_0 = 2.7552e-04
Loss = 3.9962e-03, PNorm = 168.9450, GNorm = 0.1331, lr_0 = 2.7533e-04
Loss = 5.2737e-03, PNorm = 168.9561, GNorm = 0.1410, lr_0 = 2.7514e-04
Loss = 4.2416e-03, PNorm = 168.9677, GNorm = 0.3022, lr_0 = 2.7495e-04
Loss = 3.6954e-03, PNorm = 168.9774, GNorm = 0.0809, lr_0 = 2.7476e-04
Loss = 4.2446e-03, PNorm = 168.9851, GNorm = 0.7077, lr_0 = 2.7457e-04
Loss = 4.2314e-03, PNorm = 168.9927, GNorm = 0.1914, lr_0 = 2.7439e-04
Loss = 3.0028e-03, PNorm = 168.9996, GNorm = 0.1640, lr_0 = 2.7420e-04
Loss = 3.3708e-03, PNorm = 169.0082, GNorm = 0.2172, lr_0 = 2.7401e-04
Loss = 3.4674e-03, PNorm = 169.0177, GNorm = 0.2261, lr_0 = 2.7382e-04
Loss = 3.9428e-03, PNorm = 169.0267, GNorm = 0.1015, lr_0 = 2.7364e-04
Loss = 4.7210e-03, PNorm = 169.0364, GNorm = 0.1497, lr_0 = 2.7345e-04
Loss = 4.2649e-03, PNorm = 169.0432, GNorm = 0.0577, lr_0 = 2.7326e-04
Loss = 4.0676e-03, PNorm = 169.0496, GNorm = 0.2831, lr_0 = 2.7307e-04
Loss = 4.7133e-03, PNorm = 169.0571, GNorm = 0.1073, lr_0 = 2.7289e-04
Loss = 5.2354e-03, PNorm = 169.0668, GNorm = 0.0960, lr_0 = 2.7270e-04
Loss = 3.5196e-03, PNorm = 169.0787, GNorm = 0.1730, lr_0 = 2.7251e-04
Loss = 3.4113e-03, PNorm = 169.0893, GNorm = 0.1612, lr_0 = 2.7233e-04
Loss = 3.6095e-03, PNorm = 169.0993, GNorm = 0.1952, lr_0 = 2.7214e-04
Loss = 3.5577e-03, PNorm = 169.1073, GNorm = 0.0747, lr_0 = 2.7195e-04
Loss = 7.9904e-03, PNorm = 169.1152, GNorm = 0.1186, lr_0 = 2.7177e-04
Loss = 3.2877e-03, PNorm = 169.1240, GNorm = 0.3333, lr_0 = 2.7158e-04
Loss = 4.3494e-03, PNorm = 169.1352, GNorm = 0.0953, lr_0 = 2.7139e-04
Loss = 5.9705e-03, PNorm = 169.1473, GNorm = 0.2533, lr_0 = 2.7121e-04
Loss = 4.4179e-03, PNorm = 169.1589, GNorm = 0.4402, lr_0 = 2.7102e-04
Loss = 9.0503e-03, PNorm = 169.1681, GNorm = 0.1628, lr_0 = 2.7084e-04
Loss = 3.4861e-03, PNorm = 169.1800, GNorm = 0.1645, lr_0 = 2.7065e-04
Loss = 4.9477e-03, PNorm = 169.1910, GNorm = 0.3214, lr_0 = 2.7047e-04
Loss = 5.9391e-03, PNorm = 169.2046, GNorm = 0.2810, lr_0 = 2.7028e-04
Loss = 4.7236e-03, PNorm = 169.2129, GNorm = 0.0817, lr_0 = 2.7010e-04
Loss = 4.0389e-03, PNorm = 169.2218, GNorm = 0.1063, lr_0 = 2.6991e-04
Loss = 7.9641e-03, PNorm = 169.2272, GNorm = 0.1883, lr_0 = 2.6973e-04
Loss = 5.2465e-03, PNorm = 169.2366, GNorm = 0.3099, lr_0 = 2.6954e-04
Loss = 4.5469e-03, PNorm = 169.2487, GNorm = 0.3067, lr_0 = 2.6936e-04
Loss = 4.5966e-03, PNorm = 169.2629, GNorm = 0.0524, lr_0 = 2.6917e-04
Loss = 4.3250e-03, PNorm = 169.2723, GNorm = 0.2087, lr_0 = 2.6899e-04
Loss = 4.9820e-03, PNorm = 169.2846, GNorm = 0.2953, lr_0 = 2.6880e-04
Loss = 4.5622e-03, PNorm = 169.2981, GNorm = 0.3025, lr_0 = 2.6862e-04
Loss = 4.5694e-03, PNorm = 169.3055, GNorm = 0.1517, lr_0 = 2.6844e-04
Loss = 4.0442e-03, PNorm = 169.3127, GNorm = 0.1490, lr_0 = 2.6825e-04
Validation mae = 0.279701
Epoch 18
Loss = 4.6987e-03, PNorm = 169.3217, GNorm = 0.4805, lr_0 = 2.6807e-04
Loss = 3.5395e-03, PNorm = 169.3276, GNorm = 0.2501, lr_0 = 2.6788e-04
Loss = 3.5022e-03, PNorm = 169.3333, GNorm = 0.0641, lr_0 = 2.6770e-04
Loss = 4.2105e-03, PNorm = 169.3394, GNorm = 0.3432, lr_0 = 2.6752e-04
Loss = 3.6759e-03, PNorm = 169.3464, GNorm = 0.1476, lr_0 = 2.6733e-04
Loss = 3.1468e-03, PNorm = 169.3545, GNorm = 0.0689, lr_0 = 2.6715e-04
Loss = 3.6256e-03, PNorm = 169.3600, GNorm = 0.2080, lr_0 = 2.6697e-04
Loss = 3.3950e-03, PNorm = 169.3674, GNorm = 0.0784, lr_0 = 2.6678e-04
Loss = 3.2830e-03, PNorm = 169.3735, GNorm = 0.4771, lr_0 = 2.6660e-04
Loss = 3.6261e-03, PNorm = 169.3810, GNorm = 0.4645, lr_0 = 2.6642e-04
Loss = 4.2861e-03, PNorm = 169.3879, GNorm = 0.0941, lr_0 = 2.6624e-04
Loss = 3.6247e-03, PNorm = 169.3957, GNorm = 0.1896, lr_0 = 2.6605e-04
Loss = 4.4803e-03, PNorm = 169.4021, GNorm = 0.0784, lr_0 = 2.6587e-04
Loss = 4.1059e-03, PNorm = 169.4086, GNorm = 0.2776, lr_0 = 2.6569e-04
Loss = 3.1503e-03, PNorm = 169.4132, GNorm = 0.1731, lr_0 = 2.6551e-04
Loss = 4.2565e-03, PNorm = 169.4178, GNorm = 0.2102, lr_0 = 2.6533e-04
Loss = 4.7031e-03, PNorm = 169.4252, GNorm = 0.2859, lr_0 = 2.6514e-04
Loss = 3.9402e-03, PNorm = 169.4335, GNorm = 0.0573, lr_0 = 2.6496e-04
Loss = 3.4888e-03, PNorm = 169.4428, GNorm = 0.0782, lr_0 = 2.6478e-04
Loss = 3.2835e-03, PNorm = 169.4507, GNorm = 0.2122, lr_0 = 2.6460e-04
Loss = 4.1199e-03, PNorm = 169.4602, GNorm = 0.2876, lr_0 = 2.6442e-04
Loss = 4.7660e-03, PNorm = 169.4654, GNorm = 0.2688, lr_0 = 2.6424e-04
Loss = 3.9442e-03, PNorm = 169.4741, GNorm = 0.2525, lr_0 = 2.6406e-04
Loss = 4.1140e-03, PNorm = 169.4827, GNorm = 0.2023, lr_0 = 2.6388e-04
Loss = 3.6370e-03, PNorm = 169.4898, GNorm = 0.1239, lr_0 = 2.6369e-04
Loss = 3.8930e-03, PNorm = 169.4962, GNorm = 0.1613, lr_0 = 2.6351e-04
Loss = 3.6750e-03, PNorm = 169.5026, GNorm = 0.0715, lr_0 = 2.6333e-04
Loss = 3.3858e-03, PNorm = 169.5093, GNorm = 0.0952, lr_0 = 2.6315e-04
Loss = 4.0896e-03, PNorm = 169.5157, GNorm = 0.1974, lr_0 = 2.6297e-04
Loss = 3.3764e-03, PNorm = 169.5237, GNorm = 0.0775, lr_0 = 2.6279e-04
Loss = 3.5623e-03, PNorm = 169.5312, GNorm = 0.1323, lr_0 = 2.6261e-04
Loss = 3.0343e-03, PNorm = 169.5382, GNorm = 0.2382, lr_0 = 2.6243e-04
Loss = 6.4903e-03, PNorm = 169.5449, GNorm = 0.2891, lr_0 = 2.6225e-04
Loss = 3.0361e-03, PNorm = 169.5521, GNorm = 0.1556, lr_0 = 2.6207e-04
Loss = 3.6443e-03, PNorm = 169.5572, GNorm = 0.1650, lr_0 = 2.6189e-04
Loss = 3.3511e-03, PNorm = 169.5614, GNorm = 0.0904, lr_0 = 2.6171e-04
Loss = 3.2897e-03, PNorm = 169.5679, GNorm = 0.1285, lr_0 = 2.6153e-04
Loss = 4.4277e-03, PNorm = 169.5761, GNorm = 0.2351, lr_0 = 2.6136e-04
Loss = 3.8383e-03, PNorm = 169.5852, GNorm = 0.1536, lr_0 = 2.6118e-04
Loss = 2.6794e-03, PNorm = 169.5933, GNorm = 0.1126, lr_0 = 2.6100e-04
Loss = 5.1339e-03, PNorm = 169.6014, GNorm = 0.1260, lr_0 = 2.6082e-04
Loss = 3.0806e-03, PNorm = 169.6073, GNorm = 0.4759, lr_0 = 2.6064e-04
Loss = 3.9103e-03, PNorm = 169.6161, GNorm = 0.1660, lr_0 = 2.6046e-04
Loss = 3.8640e-03, PNorm = 169.6240, GNorm = 0.2781, lr_0 = 2.6028e-04
Loss = 4.1017e-03, PNorm = 169.6325, GNorm = 0.1972, lr_0 = 2.6011e-04
Loss = 3.6829e-03, PNorm = 169.6377, GNorm = 0.2494, lr_0 = 2.5993e-04
Loss = 5.6164e-03, PNorm = 169.6452, GNorm = 0.1243, lr_0 = 2.5975e-04
Loss = 3.7151e-03, PNorm = 169.6543, GNorm = 0.2613, lr_0 = 2.5957e-04
Loss = 3.4807e-03, PNorm = 169.6588, GNorm = 0.0870, lr_0 = 2.5939e-04
Loss = 3.8685e-03, PNorm = 169.6659, GNorm = 0.1422, lr_0 = 2.5922e-04
Loss = 2.7290e-03, PNorm = 169.6747, GNorm = 0.0913, lr_0 = 2.5904e-04
Loss = 3.3502e-03, PNorm = 169.6845, GNorm = 0.1762, lr_0 = 2.5886e-04
Loss = 4.1762e-03, PNorm = 169.6909, GNorm = 0.1297, lr_0 = 2.5868e-04
Loss = 4.3693e-03, PNorm = 169.6986, GNorm = 0.2623, lr_0 = 2.5851e-04
Loss = 3.5216e-03, PNorm = 169.7068, GNorm = 0.1950, lr_0 = 2.5833e-04
Loss = 3.7051e-03, PNorm = 169.7155, GNorm = 0.0758, lr_0 = 2.5815e-04
Loss = 4.0458e-03, PNorm = 169.7222, GNorm = 0.2255, lr_0 = 2.5797e-04
Loss = 3.5299e-03, PNorm = 169.7257, GNorm = 0.2464, lr_0 = 2.5780e-04
Loss = 8.0362e-03, PNorm = 169.7308, GNorm = 0.1539, lr_0 = 2.5762e-04
Loss = 4.9805e-03, PNorm = 169.7359, GNorm = 0.1715, lr_0 = 2.5745e-04
Loss = 4.4811e-03, PNorm = 169.7471, GNorm = 0.1551, lr_0 = 2.5727e-04
Loss = 4.8948e-03, PNorm = 169.7567, GNorm = 0.2352, lr_0 = 2.5709e-04
Loss = 4.1615e-03, PNorm = 169.7670, GNorm = 0.1109, lr_0 = 2.5692e-04
Loss = 3.5441e-03, PNorm = 169.7754, GNorm = 0.1121, lr_0 = 2.5674e-04
Loss = 3.6441e-03, PNorm = 169.7836, GNorm = 0.1646, lr_0 = 2.5656e-04
Loss = 3.4667e-03, PNorm = 169.7913, GNorm = 0.1692, lr_0 = 2.5639e-04
Loss = 6.8213e-03, PNorm = 169.7980, GNorm = 0.3426, lr_0 = 2.5621e-04
Loss = 4.4658e-03, PNorm = 169.8048, GNorm = 0.1542, lr_0 = 2.5604e-04
Loss = 3.2120e-03, PNorm = 169.8111, GNorm = 0.1800, lr_0 = 2.5586e-04
Loss = 4.2955e-03, PNorm = 169.8190, GNorm = 0.2249, lr_0 = 2.5569e-04
Loss = 3.3739e-03, PNorm = 169.8257, GNorm = 0.0762, lr_0 = 2.5551e-04
Loss = 3.0008e-03, PNorm = 169.8349, GNorm = 0.0615, lr_0 = 2.5534e-04
Loss = 5.1669e-03, PNorm = 169.8437, GNorm = 0.1199, lr_0 = 2.5516e-04
Loss = 3.3904e-03, PNorm = 169.8519, GNorm = 0.0694, lr_0 = 2.5499e-04
Loss = 6.5117e-03, PNorm = 169.8573, GNorm = 0.8705, lr_0 = 2.5481e-04
Loss = 3.3825e-03, PNorm = 169.8647, GNorm = 0.0941, lr_0 = 2.5464e-04
Loss = 5.1813e-03, PNorm = 169.8746, GNorm = 0.4766, lr_0 = 2.5446e-04
Loss = 3.3718e-03, PNorm = 169.8863, GNorm = 0.2722, lr_0 = 2.5429e-04
Loss = 5.1939e-03, PNorm = 169.8941, GNorm = 0.2869, lr_0 = 2.5411e-04
Loss = 3.4770e-03, PNorm = 169.9026, GNorm = 0.0975, lr_0 = 2.5394e-04
Loss = 3.2102e-03, PNorm = 169.9107, GNorm = 0.1873, lr_0 = 2.5377e-04
Loss = 3.9334e-03, PNorm = 169.9186, GNorm = 0.1888, lr_0 = 2.5359e-04
Loss = 3.3865e-03, PNorm = 169.9267, GNorm = 0.1290, lr_0 = 2.5342e-04
Loss = 4.4986e-03, PNorm = 169.9345, GNorm = 0.1335, lr_0 = 2.5325e-04
Loss = 3.7520e-03, PNorm = 169.9385, GNorm = 0.1832, lr_0 = 2.5307e-04
Loss = 3.8994e-03, PNorm = 169.9430, GNorm = 0.1195, lr_0 = 2.5290e-04
Loss = 4.1024e-03, PNorm = 169.9466, GNorm = 0.1658, lr_0 = 2.5273e-04
Loss = 3.6817e-03, PNorm = 169.9547, GNorm = 0.1952, lr_0 = 2.5255e-04
Loss = 3.4108e-03, PNorm = 169.9600, GNorm = 0.1582, lr_0 = 2.5238e-04
Loss = 2.6946e-03, PNorm = 169.9668, GNorm = 0.0977, lr_0 = 2.5221e-04
Loss = 5.1578e-03, PNorm = 169.9756, GNorm = 0.1767, lr_0 = 2.5203e-04
Loss = 3.8249e-03, PNorm = 169.9847, GNorm = 0.2433, lr_0 = 2.5186e-04
Loss = 3.3885e-03, PNorm = 169.9936, GNorm = 0.2000, lr_0 = 2.5169e-04
Loss = 5.1547e-03, PNorm = 170.0009, GNorm = 0.3713, lr_0 = 2.5152e-04
Loss = 4.4204e-03, PNorm = 170.0088, GNorm = 0.1755, lr_0 = 2.5134e-04
Loss = 3.4477e-03, PNorm = 170.0170, GNorm = 0.0917, lr_0 = 2.5117e-04
Loss = 2.8215e-03, PNorm = 170.0255, GNorm = 0.1238, lr_0 = 2.5100e-04
Loss = 2.9823e-03, PNorm = 170.0335, GNorm = 0.1876, lr_0 = 2.5083e-04
Loss = 3.8203e-03, PNorm = 170.0401, GNorm = 0.2510, lr_0 = 2.5066e-04
Loss = 4.4852e-03, PNorm = 170.0489, GNorm = 0.0935, lr_0 = 2.5048e-04
Loss = 3.9450e-03, PNorm = 170.0553, GNorm = 0.1557, lr_0 = 2.5031e-04
Loss = 3.3570e-03, PNorm = 170.0640, GNorm = 0.1891, lr_0 = 2.5014e-04
Loss = 4.0299e-03, PNorm = 170.0722, GNorm = 0.1446, lr_0 = 2.4997e-04
Loss = 4.3666e-03, PNorm = 170.0783, GNorm = 0.0730, lr_0 = 2.4980e-04
Loss = 3.4421e-03, PNorm = 170.0867, GNorm = 0.1001, lr_0 = 2.4963e-04
Loss = 4.4990e-03, PNorm = 170.0949, GNorm = 0.0912, lr_0 = 2.4946e-04
Loss = 3.7509e-03, PNorm = 170.1047, GNorm = 0.0684, lr_0 = 2.4929e-04
Loss = 3.2007e-03, PNorm = 170.1122, GNorm = 0.3547, lr_0 = 2.4911e-04
Loss = 3.4339e-03, PNorm = 170.1195, GNorm = 0.0483, lr_0 = 2.4894e-04
Loss = 4.4630e-03, PNorm = 170.1275, GNorm = 0.2300, lr_0 = 2.4877e-04
Loss = 3.9572e-03, PNorm = 170.1348, GNorm = 0.0874, lr_0 = 2.4860e-04
Loss = 3.6310e-03, PNorm = 170.1422, GNorm = 0.1570, lr_0 = 2.4843e-04
Loss = 4.2884e-03, PNorm = 170.1514, GNorm = 0.2507, lr_0 = 2.4826e-04
Loss = 5.0595e-03, PNorm = 170.1581, GNorm = 0.3898, lr_0 = 2.4809e-04
Loss = 7.2090e-03, PNorm = 170.1688, GNorm = 0.1817, lr_0 = 2.4792e-04
Loss = 4.3171e-03, PNorm = 170.1784, GNorm = 0.3188, lr_0 = 2.4775e-04
Loss = 5.4559e-03, PNorm = 170.1871, GNorm = 0.7162, lr_0 = 2.4758e-04
Loss = 3.4195e-03, PNorm = 170.1965, GNorm = 0.0968, lr_0 = 2.4741e-04
Loss = 3.4749e-03, PNorm = 170.2047, GNorm = 0.1608, lr_0 = 2.4724e-04
Loss = 4.9467e-03, PNorm = 170.2117, GNorm = 0.2334, lr_0 = 2.4707e-04
Validation mae = 0.278783
Epoch 19
Loss = 4.7180e-03, PNorm = 170.2151, GNorm = 0.0927, lr_0 = 2.4690e-04
Loss = 2.9139e-03, PNorm = 170.2175, GNorm = 0.1245, lr_0 = 2.4674e-04
Loss = 3.7343e-03, PNorm = 170.2230, GNorm = 0.1870, lr_0 = 2.4657e-04
Loss = 4.6690e-03, PNorm = 170.2307, GNorm = 0.1432, lr_0 = 2.4640e-04
Loss = 3.0750e-03, PNorm = 170.2389, GNorm = 0.3358, lr_0 = 2.4623e-04
Loss = 3.2138e-03, PNorm = 170.2474, GNorm = 0.3298, lr_0 = 2.4606e-04
Loss = 4.0742e-03, PNorm = 170.2547, GNorm = 0.1609, lr_0 = 2.4589e-04
Loss = 3.1884e-03, PNorm = 170.2601, GNorm = 0.2376, lr_0 = 2.4572e-04
Loss = 3.0467e-03, PNorm = 170.2677, GNorm = 0.1124, lr_0 = 2.4556e-04
Loss = 3.1219e-03, PNorm = 170.2730, GNorm = 0.0853, lr_0 = 2.4539e-04
Loss = 3.2461e-03, PNorm = 170.2789, GNorm = 0.1577, lr_0 = 2.4522e-04
Loss = 4.0910e-03, PNorm = 170.2868, GNorm = 0.0569, lr_0 = 2.4505e-04
Loss = 3.0583e-03, PNorm = 170.2958, GNorm = 0.1358, lr_0 = 2.4488e-04
Loss = 2.7204e-03, PNorm = 170.3031, GNorm = 0.0674, lr_0 = 2.4472e-04
Loss = 3.2792e-03, PNorm = 170.3113, GNorm = 0.2207, lr_0 = 2.4455e-04
Loss = 2.6186e-03, PNorm = 170.3161, GNorm = 0.1349, lr_0 = 2.4438e-04
Loss = 3.2457e-03, PNorm = 170.3196, GNorm = 0.1393, lr_0 = 2.4421e-04
Loss = 3.0915e-03, PNorm = 170.3247, GNorm = 0.0653, lr_0 = 2.4405e-04
Loss = 4.1481e-03, PNorm = 170.3278, GNorm = 0.3399, lr_0 = 2.4388e-04
Loss = 3.7557e-03, PNorm = 170.3324, GNorm = 0.0845, lr_0 = 2.4371e-04
Loss = 2.5172e-03, PNorm = 170.3399, GNorm = 0.0866, lr_0 = 2.4354e-04
Loss = 3.3083e-03, PNorm = 170.3474, GNorm = 0.4650, lr_0 = 2.4338e-04
Loss = 3.3594e-03, PNorm = 170.3534, GNorm = 0.2309, lr_0 = 2.4321e-04
Loss = 3.5420e-03, PNorm = 170.3579, GNorm = 0.1329, lr_0 = 2.4304e-04
Loss = 3.0852e-03, PNorm = 170.3628, GNorm = 0.2390, lr_0 = 2.4288e-04
Loss = 3.2399e-03, PNorm = 170.3713, GNorm = 0.1041, lr_0 = 2.4271e-04
Loss = 2.5859e-03, PNorm = 170.3767, GNorm = 0.1956, lr_0 = 2.4254e-04
Loss = 3.1876e-03, PNorm = 170.3819, GNorm = 0.1665, lr_0 = 2.4238e-04
Loss = 3.2296e-03, PNorm = 170.3877, GNorm = 0.1172, lr_0 = 2.4221e-04
Loss = 4.5138e-03, PNorm = 170.3935, GNorm = 0.1762, lr_0 = 2.4205e-04
Loss = 3.6677e-03, PNorm = 170.4015, GNorm = 0.2705, lr_0 = 2.4188e-04
Loss = 2.6423e-03, PNorm = 170.4096, GNorm = 0.0796, lr_0 = 2.4171e-04
Loss = 3.6097e-03, PNorm = 170.4144, GNorm = 0.1710, lr_0 = 2.4155e-04
Loss = 4.3384e-03, PNorm = 170.4189, GNorm = 0.1340, lr_0 = 2.4138e-04
Loss = 3.5833e-03, PNorm = 170.4236, GNorm = 0.2071, lr_0 = 2.4122e-04
Loss = 4.5498e-03, PNorm = 170.4315, GNorm = 0.3508, lr_0 = 2.4105e-04
Loss = 2.4425e-03, PNorm = 170.4378, GNorm = 0.1227, lr_0 = 2.4089e-04
Loss = 2.9836e-03, PNorm = 170.4428, GNorm = 0.0954, lr_0 = 2.4072e-04
Loss = 6.2617e-03, PNorm = 170.4515, GNorm = 0.3888, lr_0 = 2.4056e-04
Loss = 3.9915e-03, PNorm = 170.4572, GNorm = 0.1124, lr_0 = 2.4039e-04
Loss = 2.7579e-03, PNorm = 170.4645, GNorm = 0.2815, lr_0 = 2.4023e-04
Loss = 3.2517e-03, PNorm = 170.4696, GNorm = 0.2913, lr_0 = 2.4006e-04
Loss = 2.7437e-03, PNorm = 170.4743, GNorm = 0.0827, lr_0 = 2.3990e-04
Loss = 3.2300e-03, PNorm = 170.4807, GNorm = 0.2224, lr_0 = 2.3974e-04
Loss = 3.7335e-03, PNorm = 170.4884, GNorm = 0.2131, lr_0 = 2.3957e-04
Loss = 4.0813e-03, PNorm = 170.4956, GNorm = 0.1356, lr_0 = 2.3941e-04
Loss = 3.7582e-03, PNorm = 170.5012, GNorm = 0.4434, lr_0 = 2.3924e-04
Loss = 3.5458e-03, PNorm = 170.5051, GNorm = 0.2052, lr_0 = 2.3908e-04
Loss = 3.6478e-03, PNorm = 170.5120, GNorm = 0.0883, lr_0 = 2.3892e-04
Loss = 2.8722e-03, PNorm = 170.5183, GNorm = 0.1095, lr_0 = 2.3875e-04
Loss = 3.3504e-03, PNorm = 170.5242, GNorm = 0.1149, lr_0 = 2.3859e-04
Loss = 2.5475e-03, PNorm = 170.5300, GNorm = 0.2256, lr_0 = 2.3842e-04
Loss = 3.3003e-03, PNorm = 170.5353, GNorm = 0.0963, lr_0 = 2.3826e-04
Loss = 5.0144e-03, PNorm = 170.5404, GNorm = 0.2784, lr_0 = 2.3810e-04
Loss = 3.4828e-03, PNorm = 170.5440, GNorm = 0.2228, lr_0 = 2.3794e-04
Loss = 4.0036e-03, PNorm = 170.5501, GNorm = 0.1655, lr_0 = 2.3777e-04
Loss = 3.3042e-03, PNorm = 170.5580, GNorm = 0.1363, lr_0 = 2.3761e-04
Loss = 3.6968e-03, PNorm = 170.5665, GNorm = 0.2425, lr_0 = 2.3745e-04
Loss = 5.4454e-03, PNorm = 170.5717, GNorm = 0.1313, lr_0 = 2.3728e-04
Loss = 3.0189e-03, PNorm = 170.5768, GNorm = 0.1358, lr_0 = 2.3712e-04
Loss = 3.0952e-03, PNorm = 170.5799, GNorm = 0.1993, lr_0 = 2.3696e-04
Loss = 2.6699e-03, PNorm = 170.5876, GNorm = 0.0876, lr_0 = 2.3680e-04
Loss = 2.9820e-03, PNorm = 170.5951, GNorm = 0.2711, lr_0 = 2.3663e-04
Loss = 2.6413e-03, PNorm = 170.5997, GNorm = 0.1618, lr_0 = 2.3647e-04
Loss = 5.2905e-03, PNorm = 170.6042, GNorm = 0.1895, lr_0 = 2.3631e-04
Loss = 3.1285e-03, PNorm = 170.6103, GNorm = 0.3304, lr_0 = 2.3615e-04
Loss = 5.2130e-03, PNorm = 170.6168, GNorm = 0.3150, lr_0 = 2.3599e-04
Loss = 3.3949e-03, PNorm = 170.6226, GNorm = 0.0766, lr_0 = 2.3582e-04
Loss = 3.1362e-03, PNorm = 170.6292, GNorm = 0.0721, lr_0 = 2.3566e-04
Loss = 2.3376e-03, PNorm = 170.6367, GNorm = 0.0614, lr_0 = 2.3550e-04
Loss = 2.8844e-03, PNorm = 170.6432, GNorm = 0.2033, lr_0 = 2.3534e-04
Loss = 4.1313e-03, PNorm = 170.6495, GNorm = 0.0846, lr_0 = 2.3518e-04
Loss = 3.5384e-03, PNorm = 170.6564, GNorm = 0.0803, lr_0 = 2.3502e-04
Loss = 3.4324e-03, PNorm = 170.6653, GNorm = 0.1477, lr_0 = 2.3486e-04
Loss = 2.5792e-03, PNorm = 170.6722, GNorm = 0.1725, lr_0 = 2.3470e-04
Loss = 4.8070e-03, PNorm = 170.6800, GNorm = 0.1645, lr_0 = 2.3454e-04
Loss = 2.4468e-03, PNorm = 170.6869, GNorm = 0.1132, lr_0 = 2.3437e-04
Loss = 3.0322e-03, PNorm = 170.6928, GNorm = 0.1801, lr_0 = 2.3421e-04
Loss = 2.6236e-03, PNorm = 170.6976, GNorm = 0.1693, lr_0 = 2.3405e-04
Loss = 4.1280e-03, PNorm = 170.7026, GNorm = 0.6221, lr_0 = 2.3389e-04
Loss = 2.8662e-03, PNorm = 170.7089, GNorm = 0.1019, lr_0 = 2.3373e-04
Loss = 5.3329e-03, PNorm = 170.7161, GNorm = 0.1432, lr_0 = 2.3357e-04
Loss = 2.8476e-03, PNorm = 170.7233, GNorm = 0.2263, lr_0 = 2.3341e-04
Loss = 2.8814e-03, PNorm = 170.7288, GNorm = 0.3265, lr_0 = 2.3325e-04
Loss = 3.9048e-03, PNorm = 170.7363, GNorm = 0.0774, lr_0 = 2.3309e-04
Loss = 3.1042e-03, PNorm = 170.7422, GNorm = 0.1312, lr_0 = 2.3293e-04
Loss = 2.9311e-03, PNorm = 170.7484, GNorm = 0.1509, lr_0 = 2.3277e-04
Loss = 3.0869e-03, PNorm = 170.7542, GNorm = 0.1455, lr_0 = 2.3261e-04
Loss = 3.1916e-03, PNorm = 170.7564, GNorm = 0.1268, lr_0 = 2.3246e-04
Loss = 4.2053e-03, PNorm = 170.7633, GNorm = 0.2694, lr_0 = 2.3230e-04
Loss = 3.1504e-03, PNorm = 170.7701, GNorm = 0.4122, lr_0 = 2.3214e-04
Loss = 2.8776e-03, PNorm = 170.7786, GNorm = 0.0787, lr_0 = 2.3198e-04
Loss = 3.0094e-03, PNorm = 170.7862, GNorm = 0.0654, lr_0 = 2.3182e-04
Loss = 5.5428e-03, PNorm = 170.7918, GNorm = 0.1742, lr_0 = 2.3166e-04
Loss = 3.7478e-03, PNorm = 170.7983, GNorm = 0.3310, lr_0 = 2.3150e-04
Loss = 3.2159e-03, PNorm = 170.8048, GNorm = 0.0614, lr_0 = 2.3134e-04
Loss = 5.5853e-03, PNorm = 170.8120, GNorm = 0.1688, lr_0 = 2.3118e-04
Loss = 2.6329e-03, PNorm = 170.8186, GNorm = 0.1693, lr_0 = 2.3103e-04
Loss = 2.8190e-03, PNorm = 170.8232, GNorm = 0.1785, lr_0 = 2.3087e-04
Loss = 3.0690e-03, PNorm = 170.8291, GNorm = 0.1195, lr_0 = 2.3071e-04
Loss = 3.0010e-03, PNorm = 170.8366, GNorm = 0.2406, lr_0 = 2.3055e-04
Loss = 4.3655e-03, PNorm = 170.8437, GNorm = 0.4217, lr_0 = 2.3039e-04
Loss = 5.9464e-03, PNorm = 170.8528, GNorm = 0.3227, lr_0 = 2.3024e-04
Loss = 4.2660e-03, PNorm = 170.8574, GNorm = 0.2631, lr_0 = 2.3008e-04
Loss = 3.2024e-03, PNorm = 170.8640, GNorm = 0.1637, lr_0 = 2.2992e-04
Loss = 3.4033e-03, PNorm = 170.8696, GNorm = 0.2011, lr_0 = 2.2976e-04
Loss = 2.8327e-03, PNorm = 170.8763, GNorm = 0.0742, lr_0 = 2.2961e-04
Loss = 3.2361e-03, PNorm = 170.8854, GNorm = 0.1317, lr_0 = 2.2945e-04
Loss = 3.9498e-03, PNorm = 170.8942, GNorm = 0.1525, lr_0 = 2.2929e-04
Loss = 2.8868e-03, PNorm = 170.9028, GNorm = 0.3416, lr_0 = 2.2913e-04
Loss = 4.4887e-03, PNorm = 170.9101, GNorm = 0.1900, lr_0 = 2.2898e-04
Loss = 6.3857e-03, PNorm = 170.9174, GNorm = 0.0498, lr_0 = 2.2882e-04
Loss = 4.1000e-03, PNorm = 170.9242, GNorm = 0.0742, lr_0 = 2.2866e-04
Loss = 3.1492e-03, PNorm = 170.9287, GNorm = 0.1010, lr_0 = 2.2851e-04
Loss = 3.2122e-03, PNorm = 170.9319, GNorm = 0.3248, lr_0 = 2.2835e-04
Loss = 2.7612e-03, PNorm = 170.9404, GNorm = 0.1658, lr_0 = 2.2819e-04
Loss = 3.2879e-03, PNorm = 170.9480, GNorm = 0.1813, lr_0 = 2.2804e-04
Loss = 3.7849e-03, PNorm = 170.9555, GNorm = 0.0677, lr_0 = 2.2788e-04
Loss = 5.4059e-03, PNorm = 170.9607, GNorm = 0.1716, lr_0 = 2.2773e-04
Loss = 4.3590e-03, PNorm = 170.9666, GNorm = 0.1744, lr_0 = 2.2757e-04
Validation mae = 0.278909
Epoch 20
Loss = 3.4658e-03, PNorm = 170.9693, GNorm = 0.2645, lr_0 = 2.2741e-04
Loss = 2.8565e-03, PNorm = 170.9730, GNorm = 0.1209, lr_0 = 2.2726e-04
Loss = 2.7146e-03, PNorm = 170.9763, GNorm = 0.1579, lr_0 = 2.2710e-04
Loss = 3.0747e-03, PNorm = 170.9819, GNorm = 0.0756, lr_0 = 2.2695e-04
Loss = 3.1080e-03, PNorm = 170.9867, GNorm = 0.1916, lr_0 = 2.2679e-04
Loss = 2.6843e-03, PNorm = 170.9910, GNorm = 0.0869, lr_0 = 2.2664e-04
Loss = 3.4923e-03, PNorm = 170.9968, GNorm = 0.0520, lr_0 = 2.2648e-04
Loss = 3.5198e-03, PNorm = 171.0004, GNorm = 0.1723, lr_0 = 2.2632e-04
Loss = 2.6593e-03, PNorm = 171.0047, GNorm = 0.3023, lr_0 = 2.2617e-04
Loss = 3.0888e-03, PNorm = 171.0103, GNorm = 0.2058, lr_0 = 2.2601e-04
Loss = 2.7983e-03, PNorm = 171.0134, GNorm = 0.1070, lr_0 = 2.2586e-04
Loss = 2.9625e-03, PNorm = 171.0187, GNorm = 0.1890, lr_0 = 2.2571e-04
Loss = 2.5047e-03, PNorm = 171.0255, GNorm = 0.2885, lr_0 = 2.2555e-04
Loss = 2.9404e-03, PNorm = 171.0321, GNorm = 0.1427, lr_0 = 2.2540e-04
Loss = 2.8216e-03, PNorm = 171.0379, GNorm = 0.1144, lr_0 = 2.2524e-04
Loss = 2.1567e-03, PNorm = 171.0407, GNorm = 0.1216, lr_0 = 2.2509e-04
Loss = 3.2255e-03, PNorm = 171.0453, GNorm = 0.2667, lr_0 = 2.2493e-04
Loss = 5.9908e-03, PNorm = 171.0507, GNorm = 0.4300, lr_0 = 2.2478e-04
Loss = 3.0967e-03, PNorm = 171.0557, GNorm = 0.1029, lr_0 = 2.2463e-04
Loss = 2.6809e-03, PNorm = 171.0605, GNorm = 0.3174, lr_0 = 2.2447e-04
Loss = 3.1117e-03, PNorm = 171.0663, GNorm = 0.1641, lr_0 = 2.2432e-04
Loss = 3.7205e-03, PNorm = 171.0721, GNorm = 0.2901, lr_0 = 2.2416e-04
Loss = 3.4254e-03, PNorm = 171.0784, GNorm = 0.3657, lr_0 = 2.2401e-04
Loss = 2.6694e-03, PNorm = 171.0814, GNorm = 0.0919, lr_0 = 2.2386e-04
Loss = 2.7005e-03, PNorm = 171.0852, GNorm = 0.0831, lr_0 = 2.2370e-04
Loss = 4.0582e-03, PNorm = 171.0888, GNorm = 0.2429, lr_0 = 2.2355e-04
Loss = 3.4695e-03, PNorm = 171.0959, GNorm = 0.2038, lr_0 = 2.2340e-04
Loss = 3.7246e-03, PNorm = 171.1028, GNorm = 0.1028, lr_0 = 2.2324e-04
Loss = 3.7366e-03, PNorm = 171.1099, GNorm = 0.6197, lr_0 = 2.2309e-04
Loss = 2.2886e-03, PNorm = 171.1159, GNorm = 0.1432, lr_0 = 2.2294e-04
Loss = 2.8051e-03, PNorm = 171.1195, GNorm = 0.0488, lr_0 = 2.2279e-04
Loss = 2.4147e-03, PNorm = 171.1239, GNorm = 0.1715, lr_0 = 2.2263e-04
Loss = 3.0278e-03, PNorm = 171.1246, GNorm = 0.2506, lr_0 = 2.2248e-04
Loss = 3.5940e-03, PNorm = 171.1281, GNorm = 0.1728, lr_0 = 2.2233e-04
Loss = 3.8618e-03, PNorm = 171.1328, GNorm = 0.1672, lr_0 = 2.2218e-04
Loss = 2.3242e-03, PNorm = 171.1382, GNorm = 0.1237, lr_0 = 2.2202e-04
Loss = 3.2684e-03, PNorm = 171.1423, GNorm = 0.2063, lr_0 = 2.2187e-04
Loss = 2.6913e-03, PNorm = 171.1474, GNorm = 0.1486, lr_0 = 2.2172e-04
Loss = 3.4527e-03, PNorm = 171.1515, GNorm = 0.2691, lr_0 = 2.2157e-04
Loss = 2.1196e-03, PNorm = 171.1566, GNorm = 0.1565, lr_0 = 2.2142e-04
Loss = 3.4522e-03, PNorm = 171.1616, GNorm = 0.1963, lr_0 = 2.2126e-04
Loss = 2.2325e-03, PNorm = 171.1648, GNorm = 0.0975, lr_0 = 2.2111e-04
Loss = 2.1550e-03, PNorm = 171.1697, GNorm = 0.1406, lr_0 = 2.2096e-04
Loss = 3.0449e-03, PNorm = 171.1730, GNorm = 0.3717, lr_0 = 2.2081e-04
Loss = 2.5760e-03, PNorm = 171.1784, GNorm = 0.3206, lr_0 = 2.2066e-04
Loss = 4.9425e-03, PNorm = 171.1830, GNorm = 0.1664, lr_0 = 2.2051e-04
Loss = 3.8038e-03, PNorm = 171.1873, GNorm = 0.0978, lr_0 = 2.2036e-04
Loss = 3.3977e-03, PNorm = 171.1929, GNorm = 0.1136, lr_0 = 2.2021e-04
Loss = 3.3608e-03, PNorm = 171.1974, GNorm = 0.0699, lr_0 = 2.2005e-04
Loss = 3.2772e-03, PNorm = 171.2053, GNorm = 0.2347, lr_0 = 2.1990e-04
Loss = 3.6422e-03, PNorm = 171.2106, GNorm = 0.1082, lr_0 = 2.1975e-04
Loss = 2.2181e-03, PNorm = 171.2158, GNorm = 0.0778, lr_0 = 2.1960e-04
Loss = 2.8707e-03, PNorm = 171.2203, GNorm = 0.1694, lr_0 = 2.1945e-04
Loss = 2.5085e-03, PNorm = 171.2239, GNorm = 0.2198, lr_0 = 2.1930e-04
Loss = 4.3668e-03, PNorm = 171.2278, GNorm = 0.1118, lr_0 = 2.1915e-04
Loss = 3.9120e-03, PNorm = 171.2353, GNorm = 0.1736, lr_0 = 2.1900e-04
Loss = 3.0437e-03, PNorm = 171.2427, GNorm = 0.2346, lr_0 = 2.1885e-04
Loss = 2.1799e-03, PNorm = 171.2498, GNorm = 0.0795, lr_0 = 2.1870e-04
Loss = 2.0130e-03, PNorm = 171.2544, GNorm = 0.0509, lr_0 = 2.1855e-04
Loss = 3.0289e-03, PNorm = 171.2581, GNorm = 0.1102, lr_0 = 2.1840e-04
Loss = 2.4237e-03, PNorm = 171.2649, GNorm = 0.1489, lr_0 = 2.1825e-04
Loss = 3.2140e-03, PNorm = 171.2691, GNorm = 0.0929, lr_0 = 2.1810e-04
Loss = 2.5630e-03, PNorm = 171.2739, GNorm = 0.0694, lr_0 = 2.1795e-04
Loss = 2.5212e-03, PNorm = 171.2789, GNorm = 0.1208, lr_0 = 2.1780e-04
Loss = 2.4027e-03, PNorm = 171.2841, GNorm = 0.1926, lr_0 = 2.1765e-04
Loss = 2.2104e-03, PNorm = 171.2902, GNorm = 0.1015, lr_0 = 2.1751e-04
Loss = 3.7054e-03, PNorm = 171.2954, GNorm = 0.1524, lr_0 = 2.1736e-04
Loss = 2.5427e-03, PNorm = 171.3019, GNorm = 0.1991, lr_0 = 2.1721e-04
Loss = 3.4425e-03, PNorm = 171.3077, GNorm = 0.1844, lr_0 = 2.1706e-04
Loss = 2.1827e-03, PNorm = 171.3133, GNorm = 0.1174, lr_0 = 2.1691e-04
Loss = 2.2062e-03, PNorm = 171.3170, GNorm = 0.1106, lr_0 = 2.1676e-04
Loss = 2.6345e-03, PNorm = 171.3228, GNorm = 0.1080, lr_0 = 2.1661e-04
Loss = 2.9320e-03, PNorm = 171.3290, GNorm = 0.0569, lr_0 = 2.1646e-04
Loss = 3.5464e-03, PNorm = 171.3327, GNorm = 0.2514, lr_0 = 2.1632e-04
Loss = 2.1282e-03, PNorm = 171.3377, GNorm = 0.0677, lr_0 = 2.1617e-04
Loss = 2.2858e-03, PNorm = 171.3435, GNorm = 0.0843, lr_0 = 2.1602e-04
Loss = 2.8673e-03, PNorm = 171.3496, GNorm = 0.3452, lr_0 = 2.1587e-04
Loss = 8.0841e-03, PNorm = 171.3563, GNorm = 0.2031, lr_0 = 2.1572e-04
Loss = 3.2519e-03, PNorm = 171.3603, GNorm = 0.2066, lr_0 = 2.1558e-04
Loss = 3.3959e-03, PNorm = 171.3655, GNorm = 0.3458, lr_0 = 2.1543e-04
Loss = 2.4968e-03, PNorm = 171.3740, GNorm = 0.1584, lr_0 = 2.1528e-04
Loss = 3.9230e-03, PNorm = 171.3804, GNorm = 0.1501, lr_0 = 2.1513e-04
Loss = 2.4797e-03, PNorm = 171.3874, GNorm = 0.1863, lr_0 = 2.1499e-04
Loss = 3.5005e-03, PNorm = 171.3951, GNorm = 0.1127, lr_0 = 2.1484e-04
Loss = 3.3169e-03, PNorm = 171.4037, GNorm = 0.0705, lr_0 = 2.1469e-04
Loss = 2.2822e-03, PNorm = 171.4103, GNorm = 0.0647, lr_0 = 2.1454e-04
Loss = 2.2204e-03, PNorm = 171.4153, GNorm = 0.0846, lr_0 = 2.1440e-04
Loss = 2.7839e-03, PNorm = 171.4207, GNorm = 0.1229, lr_0 = 2.1425e-04
Loss = 3.5737e-03, PNorm = 171.4272, GNorm = 0.2608, lr_0 = 2.1410e-04
Loss = 5.6213e-03, PNorm = 171.4336, GNorm = 0.0702, lr_0 = 2.1396e-04
Loss = 2.6301e-03, PNorm = 171.4403, GNorm = 0.0702, lr_0 = 2.1381e-04
Loss = 3.0446e-03, PNorm = 171.4444, GNorm = 0.1375, lr_0 = 2.1366e-04
Loss = 3.0178e-03, PNorm = 171.4474, GNorm = 0.1767, lr_0 = 2.1352e-04
Loss = 4.3795e-03, PNorm = 171.4495, GNorm = 0.1728, lr_0 = 2.1337e-04
Loss = 3.2376e-03, PNorm = 171.4515, GNorm = 0.3934, lr_0 = 2.1323e-04
Loss = 3.4911e-03, PNorm = 171.4566, GNorm = 0.0890, lr_0 = 2.1308e-04
Loss = 2.3469e-03, PNorm = 171.4641, GNorm = 0.2218, lr_0 = 2.1293e-04
Loss = 2.4959e-03, PNorm = 171.4699, GNorm = 0.2424, lr_0 = 2.1279e-04
Loss = 2.9608e-03, PNorm = 171.4769, GNorm = 0.0976, lr_0 = 2.1264e-04
Loss = 3.6066e-03, PNorm = 171.4821, GNorm = 0.2504, lr_0 = 2.1250e-04
Loss = 2.7269e-03, PNorm = 171.4866, GNorm = 0.1621, lr_0 = 2.1235e-04
Loss = 1.9958e-03, PNorm = 171.4910, GNorm = 0.0397, lr_0 = 2.1221e-04
Loss = 2.7431e-03, PNorm = 171.4974, GNorm = 0.1291, lr_0 = 2.1206e-04
Loss = 2.2599e-03, PNorm = 171.5017, GNorm = 0.1699, lr_0 = 2.1191e-04
Loss = 3.0862e-03, PNorm = 171.5061, GNorm = 0.8387, lr_0 = 2.1177e-04
Loss = 2.2326e-03, PNorm = 171.5111, GNorm = 0.1242, lr_0 = 2.1162e-04
Loss = 2.0308e-03, PNorm = 171.5170, GNorm = 0.0685, lr_0 = 2.1148e-04
Loss = 2.2527e-03, PNorm = 171.5214, GNorm = 0.0747, lr_0 = 2.1133e-04
Loss = 3.2454e-03, PNorm = 171.5258, GNorm = 0.4469, lr_0 = 2.1119e-04
Loss = 3.3968e-03, PNorm = 171.5319, GNorm = 0.1513, lr_0 = 2.1104e-04
Loss = 2.6941e-03, PNorm = 171.5358, GNorm = 0.1340, lr_0 = 2.1090e-04
Loss = 7.1681e-03, PNorm = 171.5428, GNorm = 0.1401, lr_0 = 2.1076e-04
Loss = 3.1528e-03, PNorm = 171.5484, GNorm = 0.1932, lr_0 = 2.1061e-04
Loss = 2.6282e-03, PNorm = 171.5544, GNorm = 0.1574, lr_0 = 2.1047e-04
Loss = 2.5760e-03, PNorm = 171.5617, GNorm = 0.2448, lr_0 = 2.1032e-04
Loss = 2.5351e-03, PNorm = 171.5700, GNorm = 0.0918, lr_0 = 2.1018e-04
Loss = 4.3271e-03, PNorm = 171.5776, GNorm = 0.1836, lr_0 = 2.1003e-04
Loss = 2.8583e-03, PNorm = 171.5840, GNorm = 0.0920, lr_0 = 2.0989e-04
Loss = 2.2955e-03, PNorm = 171.5889, GNorm = 0.1829, lr_0 = 2.0975e-04
Loss = 2.9023e-03, PNorm = 171.5944, GNorm = 0.5911, lr_0 = 2.0960e-04
Validation mae = 0.278756
Epoch 21
Loss = 3.2852e-03, PNorm = 171.5973, GNorm = 0.2082, lr_0 = 2.0946e-04
Loss = 2.2181e-03, PNorm = 171.5990, GNorm = 0.1919, lr_0 = 2.0932e-04
Loss = 1.9620e-03, PNorm = 171.5999, GNorm = 0.2283, lr_0 = 2.0917e-04
Loss = 1.7083e-03, PNorm = 171.6030, GNorm = 0.0794, lr_0 = 2.0903e-04
Loss = 3.6881e-03, PNorm = 171.6070, GNorm = 0.3040, lr_0 = 2.0889e-04
Loss = 2.3467e-03, PNorm = 171.6114, GNorm = 0.2482, lr_0 = 2.0874e-04
Loss = 1.9582e-03, PNorm = 171.6152, GNorm = 0.1477, lr_0 = 2.0860e-04
Loss = 2.4694e-03, PNorm = 171.6199, GNorm = 0.0613, lr_0 = 2.0846e-04
Loss = 2.1818e-03, PNorm = 171.6225, GNorm = 0.1346, lr_0 = 2.0831e-04
Loss = 1.8735e-03, PNorm = 171.6258, GNorm = 0.1274, lr_0 = 2.0817e-04
Loss = 2.9190e-03, PNorm = 171.6292, GNorm = 0.1323, lr_0 = 2.0803e-04
Loss = 2.0261e-03, PNorm = 171.6334, GNorm = 0.2114, lr_0 = 2.0789e-04
Loss = 2.4624e-03, PNorm = 171.6362, GNorm = 0.0598, lr_0 = 2.0774e-04
Loss = 2.3318e-03, PNorm = 171.6401, GNorm = 0.0844, lr_0 = 2.0760e-04
Loss = 4.4448e-03, PNorm = 171.6461, GNorm = 0.1837, lr_0 = 2.0746e-04
Loss = 2.6445e-03, PNorm = 171.6505, GNorm = 0.0780, lr_0 = 2.0732e-04
Loss = 2.1477e-03, PNorm = 171.6562, GNorm = 0.0333, lr_0 = 2.0718e-04
Loss = 2.9495e-03, PNorm = 171.6583, GNorm = 0.0797, lr_0 = 2.0703e-04
Loss = 2.0254e-03, PNorm = 171.6596, GNorm = 0.2474, lr_0 = 2.0689e-04
Loss = 2.7583e-03, PNorm = 171.6621, GNorm = 0.2121, lr_0 = 2.0675e-04
Loss = 2.2746e-03, PNorm = 171.6666, GNorm = 0.1772, lr_0 = 2.0661e-04
Loss = 2.0912e-03, PNorm = 171.6702, GNorm = 0.1868, lr_0 = 2.0647e-04
Loss = 2.1615e-03, PNorm = 171.6748, GNorm = 0.0975, lr_0 = 2.0633e-04
Loss = 1.9500e-03, PNorm = 171.6791, GNorm = 0.1300, lr_0 = 2.0618e-04
Loss = 2.6461e-03, PNorm = 171.6840, GNorm = 0.3211, lr_0 = 2.0604e-04
Loss = 3.0028e-03, PNorm = 171.6875, GNorm = 0.0589, lr_0 = 2.0590e-04
Loss = 2.6577e-03, PNorm = 171.6923, GNorm = 0.1197, lr_0 = 2.0576e-04
Loss = 2.1062e-03, PNorm = 171.6958, GNorm = 0.1041, lr_0 = 2.0562e-04
Loss = 2.8596e-03, PNorm = 171.6995, GNorm = 0.1517, lr_0 = 2.0548e-04
Loss = 2.9222e-03, PNorm = 171.7034, GNorm = 0.1073, lr_0 = 2.0534e-04
Loss = 2.0143e-03, PNorm = 171.7067, GNorm = 0.0591, lr_0 = 2.0520e-04
Loss = 2.0391e-03, PNorm = 171.7098, GNorm = 0.1529, lr_0 = 2.0506e-04
Loss = 2.0242e-03, PNorm = 171.7141, GNorm = 0.2072, lr_0 = 2.0492e-04
Loss = 1.7931e-03, PNorm = 171.7208, GNorm = 0.2655, lr_0 = 2.0478e-04
Loss = 2.5556e-03, PNorm = 171.7240, GNorm = 0.1347, lr_0 = 2.0464e-04
Loss = 2.1107e-03, PNorm = 171.7278, GNorm = 0.3245, lr_0 = 2.0450e-04
Loss = 2.9347e-03, PNorm = 171.7312, GNorm = 0.1457, lr_0 = 2.0436e-04
Loss = 2.1653e-03, PNorm = 171.7356, GNorm = 0.1400, lr_0 = 2.0422e-04
Loss = 4.5527e-03, PNorm = 171.7418, GNorm = 0.1752, lr_0 = 2.0408e-04
Loss = 2.3615e-03, PNorm = 171.7452, GNorm = 0.1335, lr_0 = 2.0394e-04
Loss = 2.4146e-03, PNorm = 171.7491, GNorm = 0.1246, lr_0 = 2.0380e-04
Loss = 2.1555e-03, PNorm = 171.7527, GNorm = 0.1254, lr_0 = 2.0366e-04
Loss = 3.0098e-03, PNorm = 171.7564, GNorm = 0.2574, lr_0 = 2.0352e-04
Loss = 3.4844e-03, PNorm = 171.7601, GNorm = 0.2056, lr_0 = 2.0338e-04
Loss = 2.1465e-03, PNorm = 171.7642, GNorm = 0.1356, lr_0 = 2.0324e-04
Loss = 2.5900e-03, PNorm = 171.7687, GNorm = 0.1897, lr_0 = 2.0310e-04
Loss = 3.7871e-03, PNorm = 171.7734, GNorm = 0.0689, lr_0 = 2.0296e-04
Loss = 1.7910e-03, PNorm = 171.7787, GNorm = 0.0629, lr_0 = 2.0282e-04
Loss = 1.9460e-03, PNorm = 171.7821, GNorm = 0.1767, lr_0 = 2.0268e-04
Loss = 4.6351e-03, PNorm = 171.7873, GNorm = 0.4063, lr_0 = 2.0254e-04
Loss = 2.4103e-03, PNorm = 171.7956, GNorm = 0.1038, lr_0 = 2.0240e-04
Loss = 2.5958e-03, PNorm = 171.7985, GNorm = 0.2895, lr_0 = 2.0227e-04
Loss = 4.3981e-03, PNorm = 171.8025, GNorm = 0.1244, lr_0 = 2.0213e-04
Loss = 1.8786e-03, PNorm = 171.8072, GNorm = 0.0614, lr_0 = 2.0199e-04
Loss = 1.8502e-03, PNorm = 171.8123, GNorm = 0.0546, lr_0 = 2.0185e-04
Loss = 2.1924e-03, PNorm = 171.8167, GNorm = 0.2573, lr_0 = 2.0171e-04
Loss = 2.9247e-03, PNorm = 171.8204, GNorm = 0.1898, lr_0 = 2.0157e-04
Loss = 2.1094e-03, PNorm = 171.8244, GNorm = 0.2625, lr_0 = 2.0144e-04
Loss = 2.7012e-03, PNorm = 171.8289, GNorm = 0.1149, lr_0 = 2.0130e-04
Loss = 2.1333e-03, PNorm = 171.8359, GNorm = 0.0899, lr_0 = 2.0116e-04
Loss = 1.9618e-03, PNorm = 171.8419, GNorm = 0.0886, lr_0 = 2.0102e-04
Loss = 1.9199e-03, PNorm = 171.8473, GNorm = 0.1751, lr_0 = 2.0088e-04
Loss = 1.9261e-03, PNorm = 171.8499, GNorm = 0.1170, lr_0 = 2.0075e-04
Loss = 2.5145e-03, PNorm = 171.8518, GNorm = 0.1155, lr_0 = 2.0061e-04
Loss = 4.3214e-03, PNorm = 171.8525, GNorm = 0.2055, lr_0 = 2.0047e-04
Loss = 2.0575e-03, PNorm = 171.8566, GNorm = 0.0735, lr_0 = 2.0033e-04
Loss = 2.1141e-03, PNorm = 171.8607, GNorm = 0.1202, lr_0 = 2.0020e-04
Loss = 2.4315e-03, PNorm = 171.8651, GNorm = 0.1784, lr_0 = 2.0006e-04
Loss = 1.9755e-03, PNorm = 171.8703, GNorm = 0.1256, lr_0 = 1.9992e-04
Loss = 2.4092e-03, PNorm = 171.8741, GNorm = 0.2601, lr_0 = 1.9979e-04
Loss = 3.2223e-03, PNorm = 171.8807, GNorm = 0.0999, lr_0 = 1.9965e-04
Loss = 2.3316e-03, PNorm = 171.8868, GNorm = 0.1137, lr_0 = 1.9951e-04
Loss = 1.8858e-03, PNorm = 171.8914, GNorm = 0.2015, lr_0 = 1.9938e-04
Loss = 2.1811e-03, PNorm = 171.8966, GNorm = 0.3103, lr_0 = 1.9924e-04
Loss = 3.3448e-03, PNorm = 171.9013, GNorm = 0.3022, lr_0 = 1.9910e-04
Loss = 2.2395e-03, PNorm = 171.9076, GNorm = 0.1352, lr_0 = 1.9897e-04
Loss = 2.7389e-03, PNorm = 171.9110, GNorm = 0.2638, lr_0 = 1.9883e-04
Loss = 3.3120e-03, PNorm = 171.9138, GNorm = 0.1864, lr_0 = 1.9869e-04
Loss = 2.3430e-03, PNorm = 171.9177, GNorm = 0.2743, lr_0 = 1.9856e-04
Loss = 2.4376e-03, PNorm = 171.9202, GNorm = 0.1256, lr_0 = 1.9842e-04
Loss = 2.8987e-03, PNorm = 171.9227, GNorm = 0.2017, lr_0 = 1.9829e-04
Loss = 2.8127e-03, PNorm = 171.9264, GNorm = 0.0666, lr_0 = 1.9815e-04
Loss = 3.7876e-03, PNorm = 171.9308, GNorm = 0.1220, lr_0 = 1.9801e-04
Loss = 2.9815e-03, PNorm = 171.9368, GNorm = 0.1991, lr_0 = 1.9788e-04
Loss = 3.7814e-03, PNorm = 171.9413, GNorm = 0.4396, lr_0 = 1.9774e-04
Loss = 3.5591e-03, PNorm = 171.9475, GNorm = 0.1813, lr_0 = 1.9761e-04
Loss = 4.2678e-03, PNorm = 171.9539, GNorm = 0.2139, lr_0 = 1.9747e-04
Loss = 2.3623e-03, PNorm = 171.9600, GNorm = 0.1910, lr_0 = 1.9734e-04
Loss = 5.7808e-03, PNorm = 171.9640, GNorm = 0.1880, lr_0 = 1.9720e-04
Loss = 3.6657e-03, PNorm = 171.9694, GNorm = 0.1095, lr_0 = 1.9707e-04
Loss = 3.8502e-03, PNorm = 171.9746, GNorm = 0.0998, lr_0 = 1.9693e-04
Loss = 5.0533e-03, PNorm = 171.9794, GNorm = 0.2124, lr_0 = 1.9680e-04
Loss = 3.2234e-03, PNorm = 171.9834, GNorm = 0.2855, lr_0 = 1.9666e-04
Loss = 2.0654e-03, PNorm = 171.9873, GNorm = 0.0410, lr_0 = 1.9653e-04
Loss = 5.8641e-03, PNorm = 171.9909, GNorm = 0.2338, lr_0 = 1.9639e-04
Loss = 2.5109e-03, PNorm = 171.9965, GNorm = 0.0833, lr_0 = 1.9626e-04
Loss = 3.1307e-03, PNorm = 172.0018, GNorm = 0.1666, lr_0 = 1.9612e-04
Loss = 1.6805e-03, PNorm = 172.0060, GNorm = 0.2034, lr_0 = 1.9599e-04
Loss = 1.8875e-03, PNorm = 172.0102, GNorm = 0.0543, lr_0 = 1.9585e-04
Loss = 2.0538e-03, PNorm = 172.0165, GNorm = 0.1231, lr_0 = 1.9572e-04
Loss = 2.1877e-03, PNorm = 172.0216, GNorm = 0.2094, lr_0 = 1.9559e-04
Loss = 2.3460e-03, PNorm = 172.0261, GNorm = 0.1607, lr_0 = 1.9545e-04
Loss = 2.5110e-03, PNorm = 172.0307, GNorm = 0.2108, lr_0 = 1.9532e-04
Loss = 2.3265e-03, PNorm = 172.0354, GNorm = 0.2334, lr_0 = 1.9518e-04
Loss = 3.4384e-03, PNorm = 172.0415, GNorm = 0.1759, lr_0 = 1.9505e-04
Loss = 3.3486e-03, PNorm = 172.0488, GNorm = 0.2732, lr_0 = 1.9492e-04
Loss = 4.3160e-03, PNorm = 172.0527, GNorm = 0.1999, lr_0 = 1.9478e-04
Loss = 3.6261e-03, PNorm = 172.0584, GNorm = 0.2007, lr_0 = 1.9465e-04
Loss = 3.7053e-03, PNorm = 172.0644, GNorm = 0.1659, lr_0 = 1.9452e-04
Loss = 1.7330e-03, PNorm = 172.0703, GNorm = 0.1214, lr_0 = 1.9438e-04
Loss = 1.9579e-03, PNorm = 172.0739, GNorm = 0.1400, lr_0 = 1.9425e-04
Loss = 2.4482e-03, PNorm = 172.0780, GNorm = 0.1373, lr_0 = 1.9412e-04
Loss = 2.4543e-03, PNorm = 172.0839, GNorm = 0.1271, lr_0 = 1.9398e-04
Loss = 2.8835e-03, PNorm = 172.0910, GNorm = 0.0738, lr_0 = 1.9385e-04
Loss = 2.1027e-03, PNorm = 172.0953, GNorm = 0.2401, lr_0 = 1.9372e-04
Loss = 3.1083e-03, PNorm = 172.1008, GNorm = 0.2636, lr_0 = 1.9359e-04
Loss = 2.3879e-03, PNorm = 172.1054, GNorm = 0.1642, lr_0 = 1.9345e-04
Loss = 1.9468e-03, PNorm = 172.1101, GNorm = 0.0964, lr_0 = 1.9332e-04
Loss = 1.8090e-03, PNorm = 172.1125, GNorm = 0.0557, lr_0 = 1.9319e-04
Loss = 2.3736e-03, PNorm = 172.1154, GNorm = 0.2368, lr_0 = 1.9306e-04
Validation mae = 0.278737
Epoch 22
Loss = 1.9186e-03, PNorm = 172.1176, GNorm = 0.2171, lr_0 = 1.9292e-04
Loss = 2.4299e-03, PNorm = 172.1195, GNorm = 0.1076, lr_0 = 1.9279e-04
Loss = 2.1287e-03, PNorm = 172.1249, GNorm = 0.3739, lr_0 = 1.9266e-04
Loss = 2.5374e-03, PNorm = 172.1283, GNorm = 0.0504, lr_0 = 1.9253e-04
Loss = 2.6266e-03, PNorm = 172.1308, GNorm = 0.2916, lr_0 = 1.9240e-04
Loss = 2.0942e-03, PNorm = 172.1312, GNorm = 0.0966, lr_0 = 1.9226e-04
Loss = 1.7066e-03, PNorm = 172.1339, GNorm = 0.2893, lr_0 = 1.9213e-04
Loss = 1.7634e-03, PNorm = 172.1374, GNorm = 0.1205, lr_0 = 1.9200e-04
Loss = 3.1449e-03, PNorm = 172.1399, GNorm = 0.1207, lr_0 = 1.9187e-04
Loss = 3.3791e-03, PNorm = 172.1426, GNorm = 0.1756, lr_0 = 1.9174e-04
Loss = 2.4927e-03, PNorm = 172.1471, GNorm = 0.1556, lr_0 = 1.9161e-04
Loss = 3.6649e-03, PNorm = 172.1500, GNorm = 0.1282, lr_0 = 1.9148e-04
Loss = 2.0320e-03, PNorm = 172.1545, GNorm = 0.0953, lr_0 = 1.9134e-04
Loss = 2.1172e-03, PNorm = 172.1567, GNorm = 0.0989, lr_0 = 1.9121e-04
Loss = 1.7347e-03, PNorm = 172.1599, GNorm = 0.0627, lr_0 = 1.9108e-04
Loss = 1.7618e-03, PNorm = 172.1622, GNorm = 0.1315, lr_0 = 1.9095e-04
Loss = 2.7291e-03, PNorm = 172.1669, GNorm = 0.1180, lr_0 = 1.9082e-04
Loss = 2.1221e-03, PNorm = 172.1729, GNorm = 0.0860, lr_0 = 1.9069e-04
Loss = 2.4108e-03, PNorm = 172.1775, GNorm = 0.0485, lr_0 = 1.9056e-04
Loss = 1.9208e-03, PNorm = 172.1806, GNorm = 0.1063, lr_0 = 1.9043e-04
Loss = 3.4422e-03, PNorm = 172.1861, GNorm = 0.0739, lr_0 = 1.9030e-04
Loss = 3.7191e-03, PNorm = 172.1910, GNorm = 0.1679, lr_0 = 1.9017e-04
Loss = 1.6559e-03, PNorm = 172.1944, GNorm = 0.1242, lr_0 = 1.9004e-04
Loss = 1.4438e-03, PNorm = 172.1977, GNorm = 0.1092, lr_0 = 1.8991e-04
Loss = 2.1949e-03, PNorm = 172.2016, GNorm = 0.1155, lr_0 = 1.8978e-04
Loss = 1.9637e-03, PNorm = 172.2051, GNorm = 0.1229, lr_0 = 1.8965e-04
Loss = 1.6109e-03, PNorm = 172.2068, GNorm = 0.1788, lr_0 = 1.8952e-04
Loss = 2.1194e-03, PNorm = 172.2093, GNorm = 0.1139, lr_0 = 1.8939e-04
Loss = 3.0227e-03, PNorm = 172.2144, GNorm = 0.1724, lr_0 = 1.8926e-04
Loss = 2.4581e-03, PNorm = 172.2197, GNorm = 0.1286, lr_0 = 1.8913e-04
Loss = 1.7239e-03, PNorm = 172.2241, GNorm = 0.1543, lr_0 = 1.8900e-04
Loss = 2.5444e-03, PNorm = 172.2280, GNorm = 0.2767, lr_0 = 1.8887e-04
Loss = 2.1162e-03, PNorm = 172.2328, GNorm = 0.0646, lr_0 = 1.8874e-04
Loss = 1.6968e-03, PNorm = 172.2373, GNorm = 0.0613, lr_0 = 1.8861e-04
Loss = 1.5337e-03, PNorm = 172.2403, GNorm = 0.1842, lr_0 = 1.8848e-04
Loss = 2.1450e-03, PNorm = 172.2438, GNorm = 0.1157, lr_0 = 1.8835e-04
Loss = 1.9782e-03, PNorm = 172.2471, GNorm = 0.1911, lr_0 = 1.8822e-04
Loss = 2.0822e-03, PNorm = 172.2503, GNorm = 0.1052, lr_0 = 1.8809e-04
Loss = 2.5685e-03, PNorm = 172.2550, GNorm = 0.2046, lr_0 = 1.8797e-04
Loss = 2.6515e-03, PNorm = 172.2608, GNorm = 0.1814, lr_0 = 1.8784e-04
Loss = 1.8062e-03, PNorm = 172.2643, GNorm = 0.1465, lr_0 = 1.8771e-04
Loss = 1.7532e-03, PNorm = 172.2667, GNorm = 0.2622, lr_0 = 1.8758e-04
Loss = 1.5318e-03, PNorm = 172.2681, GNorm = 0.0590, lr_0 = 1.8745e-04
Loss = 2.1254e-03, PNorm = 172.2703, GNorm = 0.2029, lr_0 = 1.8732e-04
Loss = 1.7570e-03, PNorm = 172.2732, GNorm = 0.1335, lr_0 = 1.8719e-04
Loss = 2.7335e-03, PNorm = 172.2764, GNorm = 0.1428, lr_0 = 1.8707e-04
Loss = 1.9972e-03, PNorm = 172.2808, GNorm = 0.0789, lr_0 = 1.8694e-04
Loss = 3.2368e-03, PNorm = 172.2841, GNorm = 0.0499, lr_0 = 1.8681e-04
Loss = 1.8471e-03, PNorm = 172.2882, GNorm = 0.2721, lr_0 = 1.8668e-04
Loss = 3.6777e-03, PNorm = 172.2898, GNorm = 0.3308, lr_0 = 1.8655e-04
Loss = 2.1676e-03, PNorm = 172.2933, GNorm = 0.1090, lr_0 = 1.8643e-04
Loss = 3.6318e-03, PNorm = 172.2978, GNorm = 0.0780, lr_0 = 1.8630e-04
Loss = 1.7025e-03, PNorm = 172.3026, GNorm = 0.0613, lr_0 = 1.8617e-04
Loss = 3.9855e-03, PNorm = 172.3065, GNorm = 0.1205, lr_0 = 1.8604e-04
Loss = 1.9494e-03, PNorm = 172.3107, GNorm = 0.1498, lr_0 = 1.8592e-04
Loss = 2.3340e-03, PNorm = 172.3147, GNorm = 0.3035, lr_0 = 1.8579e-04
Loss = 1.6653e-03, PNorm = 172.3182, GNorm = 0.1321, lr_0 = 1.8566e-04
Loss = 3.0902e-03, PNorm = 172.3196, GNorm = 0.4353, lr_0 = 1.8553e-04
Loss = 2.0163e-03, PNorm = 172.3240, GNorm = 0.0793, lr_0 = 1.8541e-04
Loss = 2.4335e-03, PNorm = 172.3273, GNorm = 0.6181, lr_0 = 1.8528e-04
Loss = 1.7582e-03, PNorm = 172.3291, GNorm = 0.0957, lr_0 = 1.8515e-04
Loss = 1.6722e-03, PNorm = 172.3307, GNorm = 0.1598, lr_0 = 1.8503e-04
Loss = 1.5690e-03, PNorm = 172.3349, GNorm = 0.1918, lr_0 = 1.8490e-04
Loss = 2.2710e-03, PNorm = 172.3371, GNorm = 0.1419, lr_0 = 1.8477e-04
Loss = 2.5245e-03, PNorm = 172.3403, GNorm = 0.0912, lr_0 = 1.8465e-04
Loss = 4.9264e-03, PNorm = 172.3434, GNorm = 0.4432, lr_0 = 1.8452e-04
Loss = 2.7045e-03, PNorm = 172.3465, GNorm = 0.0446, lr_0 = 1.8439e-04
Loss = 3.5975e-03, PNorm = 172.3507, GNorm = 0.1092, lr_0 = 1.8427e-04
Loss = 1.7357e-03, PNorm = 172.3533, GNorm = 0.1538, lr_0 = 1.8414e-04
Loss = 3.7492e-03, PNorm = 172.3567, GNorm = 0.0737, lr_0 = 1.8401e-04
Loss = 1.7339e-03, PNorm = 172.3604, GNorm = 0.1925, lr_0 = 1.8389e-04
Loss = 2.7317e-03, PNorm = 172.3631, GNorm = 0.1172, lr_0 = 1.8376e-04
Loss = 2.4542e-03, PNorm = 172.3670, GNorm = 0.1913, lr_0 = 1.8364e-04
Loss = 1.9839e-03, PNorm = 172.3727, GNorm = 0.1362, lr_0 = 1.8351e-04
Loss = 1.8669e-03, PNorm = 172.3765, GNorm = 0.0772, lr_0 = 1.8338e-04
Loss = 3.5371e-03, PNorm = 172.3811, GNorm = 0.0850, lr_0 = 1.8326e-04
Loss = 1.5128e-03, PNorm = 172.3858, GNorm = 0.0930, lr_0 = 1.8313e-04
Loss = 4.3761e-03, PNorm = 172.3895, GNorm = 0.1323, lr_0 = 1.8301e-04
Loss = 2.4480e-03, PNorm = 172.3933, GNorm = 0.1866, lr_0 = 1.8288e-04
Loss = 2.2093e-03, PNorm = 172.3977, GNorm = 0.1321, lr_0 = 1.8276e-04
Loss = 1.8287e-03, PNorm = 172.4030, GNorm = 0.1263, lr_0 = 1.8263e-04
Loss = 1.8800e-03, PNorm = 172.4060, GNorm = 0.0645, lr_0 = 1.8251e-04
Loss = 3.1959e-03, PNorm = 172.4069, GNorm = 0.2044, lr_0 = 1.8238e-04
Loss = 1.6412e-03, PNorm = 172.4113, GNorm = 0.1565, lr_0 = 1.8226e-04
Loss = 3.0950e-03, PNorm = 172.4135, GNorm = 0.2184, lr_0 = 1.8213e-04
Loss = 1.6806e-03, PNorm = 172.4170, GNorm = 0.2001, lr_0 = 1.8201e-04
Loss = 1.5086e-03, PNorm = 172.4211, GNorm = 0.1227, lr_0 = 1.8188e-04
Loss = 1.8975e-03, PNorm = 172.4254, GNorm = 0.1701, lr_0 = 1.8176e-04
Loss = 2.0138e-03, PNorm = 172.4313, GNorm = 0.3408, lr_0 = 1.8163e-04
Loss = 1.7507e-03, PNorm = 172.4379, GNorm = 0.1239, lr_0 = 1.8151e-04
Loss = 3.0868e-03, PNorm = 172.4417, GNorm = 0.1085, lr_0 = 1.8138e-04
Loss = 2.3748e-03, PNorm = 172.4442, GNorm = 0.0882, lr_0 = 1.8126e-04
Loss = 2.1396e-03, PNorm = 172.4499, GNorm = 0.0876, lr_0 = 1.8114e-04
Loss = 3.3388e-03, PNorm = 172.4545, GNorm = 0.1079, lr_0 = 1.8101e-04
Loss = 2.7917e-03, PNorm = 172.4591, GNorm = 0.1548, lr_0 = 1.8089e-04
Loss = 1.6814e-03, PNorm = 172.4637, GNorm = 0.0408, lr_0 = 1.8076e-04
Loss = 2.4203e-03, PNorm = 172.4674, GNorm = 0.1937, lr_0 = 1.8064e-04
Loss = 2.3231e-03, PNorm = 172.4698, GNorm = 0.1444, lr_0 = 1.8052e-04
Loss = 2.7119e-03, PNorm = 172.4736, GNorm = 0.1406, lr_0 = 1.8039e-04
Loss = 2.2284e-03, PNorm = 172.4782, GNorm = 0.1886, lr_0 = 1.8027e-04
Loss = 2.6132e-03, PNorm = 172.4858, GNorm = 0.0964, lr_0 = 1.8015e-04
Loss = 1.8578e-03, PNorm = 172.4920, GNorm = 0.2019, lr_0 = 1.8002e-04
Loss = 2.7978e-03, PNorm = 172.4967, GNorm = 0.1173, lr_0 = 1.7990e-04
Loss = 1.7498e-03, PNorm = 172.5004, GNorm = 0.0538, lr_0 = 1.7978e-04
Loss = 1.9830e-03, PNorm = 172.5047, GNorm = 0.0688, lr_0 = 1.7965e-04
Loss = 3.1675e-03, PNorm = 172.5093, GNorm = 0.1143, lr_0 = 1.7953e-04
Loss = 2.6516e-03, PNorm = 172.5131, GNorm = 0.1606, lr_0 = 1.7941e-04
Loss = 3.4673e-03, PNorm = 172.5169, GNorm = 0.1109, lr_0 = 1.7928e-04
Loss = 3.4084e-03, PNorm = 172.5192, GNorm = 0.1683, lr_0 = 1.7916e-04
Loss = 1.9024e-03, PNorm = 172.5222, GNorm = 0.0819, lr_0 = 1.7904e-04
Loss = 3.0035e-03, PNorm = 172.5257, GNorm = 0.1310, lr_0 = 1.7892e-04
Loss = 3.6834e-03, PNorm = 172.5304, GNorm = 0.3602, lr_0 = 1.7879e-04
Loss = 2.5612e-03, PNorm = 172.5344, GNorm = 0.2538, lr_0 = 1.7867e-04
Loss = 1.4598e-03, PNorm = 172.5386, GNorm = 0.1086, lr_0 = 1.7855e-04
Loss = 1.7677e-03, PNorm = 172.5421, GNorm = 0.1081, lr_0 = 1.7843e-04
Loss = 2.3293e-03, PNorm = 172.5458, GNorm = 0.1060, lr_0 = 1.7830e-04
Loss = 2.8569e-03, PNorm = 172.5511, GNorm = 0.1528, lr_0 = 1.7818e-04
Loss = 2.0583e-03, PNorm = 172.5549, GNorm = 0.2186, lr_0 = 1.7806e-04
Loss = 1.4462e-03, PNorm = 172.5588, GNorm = 0.0501, lr_0 = 1.7794e-04
Loss = 5.6069e-03, PNorm = 172.5610, GNorm = 0.2845, lr_0 = 1.7782e-04
Validation mae = 0.278547
Epoch 23
Loss = 1.5727e-03, PNorm = 172.5631, GNorm = 0.0790, lr_0 = 1.7769e-04
Loss = 2.2747e-03, PNorm = 172.5650, GNorm = 0.2636, lr_0 = 1.7757e-04
Loss = 1.7418e-03, PNorm = 172.5683, GNorm = 0.0783, lr_0 = 1.7745e-04
Loss = 2.0010e-03, PNorm = 172.5721, GNorm = 0.0564, lr_0 = 1.7733e-04
Loss = 1.9050e-03, PNorm = 172.5754, GNorm = 0.0462, lr_0 = 1.7721e-04
Loss = 1.6421e-03, PNorm = 172.5782, GNorm = 0.0586, lr_0 = 1.7709e-04
Loss = 1.8347e-03, PNorm = 172.5801, GNorm = 0.1688, lr_0 = 1.7696e-04
Loss = 1.3981e-03, PNorm = 172.5827, GNorm = 0.0749, lr_0 = 1.7684e-04
Loss = 1.4476e-03, PNorm = 172.5851, GNorm = 0.0748, lr_0 = 1.7672e-04
Loss = 3.0401e-03, PNorm = 172.5901, GNorm = 0.2009, lr_0 = 1.7660e-04
Loss = 2.0962e-03, PNorm = 172.5936, GNorm = 0.0941, lr_0 = 1.7648e-04
Loss = 3.6112e-03, PNorm = 172.5977, GNorm = 0.1431, lr_0 = 1.7636e-04
Loss = 2.3856e-03, PNorm = 172.6011, GNorm = 0.1053, lr_0 = 1.7624e-04
Loss = 1.4016e-03, PNorm = 172.6033, GNorm = 0.1257, lr_0 = 1.7612e-04
Loss = 1.6092e-03, PNorm = 172.6063, GNorm = 0.1512, lr_0 = 1.7600e-04
Loss = 1.7175e-03, PNorm = 172.6085, GNorm = 0.0964, lr_0 = 1.7588e-04
Loss = 3.7149e-03, PNorm = 172.6121, GNorm = 0.0692, lr_0 = 1.7576e-04
Loss = 1.5053e-03, PNorm = 172.6148, GNorm = 0.0847, lr_0 = 1.7564e-04
Loss = 1.4331e-03, PNorm = 172.6188, GNorm = 0.0545, lr_0 = 1.7552e-04
Loss = 1.3304e-03, PNorm = 172.6216, GNorm = 0.2123, lr_0 = 1.7540e-04
Loss = 2.9702e-03, PNorm = 172.6223, GNorm = 0.2905, lr_0 = 1.7528e-04
Loss = 1.7218e-03, PNorm = 172.6246, GNorm = 0.2155, lr_0 = 1.7516e-04
Loss = 2.6726e-03, PNorm = 172.6274, GNorm = 0.1562, lr_0 = 1.7504e-04
Loss = 1.3650e-03, PNorm = 172.6309, GNorm = 0.1395, lr_0 = 1.7492e-04
Loss = 2.3105e-03, PNorm = 172.6322, GNorm = 0.1280, lr_0 = 1.7480e-04
Loss = 1.4911e-03, PNorm = 172.6361, GNorm = 0.2227, lr_0 = 1.7468e-04
Loss = 1.5641e-03, PNorm = 172.6391, GNorm = 0.2008, lr_0 = 1.7456e-04
Loss = 1.9706e-03, PNorm = 172.6416, GNorm = 0.0819, lr_0 = 1.7444e-04
Loss = 1.8018e-03, PNorm = 172.6433, GNorm = 0.0646, lr_0 = 1.7432e-04
Loss = 1.9671e-03, PNorm = 172.6461, GNorm = 0.1424, lr_0 = 1.7420e-04
Loss = 2.6510e-03, PNorm = 172.6490, GNorm = 0.0850, lr_0 = 1.7408e-04
Loss = 1.1778e-03, PNorm = 172.6517, GNorm = 0.1144, lr_0 = 1.7396e-04
Loss = 2.5510e-03, PNorm = 172.6564, GNorm = 0.1740, lr_0 = 1.7384e-04
Loss = 1.7644e-03, PNorm = 172.6609, GNorm = 0.1078, lr_0 = 1.7372e-04
Loss = 1.3823e-03, PNorm = 172.6649, GNorm = 0.0404, lr_0 = 1.7360e-04
Loss = 1.6925e-03, PNorm = 172.6675, GNorm = 0.3305, lr_0 = 1.7348e-04
Loss = 2.3730e-03, PNorm = 172.6704, GNorm = 0.0724, lr_0 = 1.7336e-04
Loss = 2.7761e-03, PNorm = 172.6717, GNorm = 0.1441, lr_0 = 1.7325e-04
Loss = 1.5525e-03, PNorm = 172.6750, GNorm = 0.0823, lr_0 = 1.7313e-04
Loss = 2.5780e-03, PNorm = 172.6791, GNorm = 0.2802, lr_0 = 1.7301e-04
Loss = 2.4968e-03, PNorm = 172.6830, GNorm = 0.1308, lr_0 = 1.7289e-04
Loss = 1.2171e-03, PNorm = 172.6873, GNorm = 0.1081, lr_0 = 1.7277e-04
Loss = 2.4615e-03, PNorm = 172.6899, GNorm = 0.2329, lr_0 = 1.7265e-04
Loss = 1.2595e-03, PNorm = 172.6919, GNorm = 0.1171, lr_0 = 1.7253e-04
Loss = 2.0970e-03, PNorm = 172.6946, GNorm = 0.1816, lr_0 = 1.7242e-04
Loss = 3.1069e-03, PNorm = 172.6977, GNorm = 0.0581, lr_0 = 1.7230e-04
Loss = 2.0621e-03, PNorm = 172.6993, GNorm = 0.1060, lr_0 = 1.7218e-04
Loss = 2.2965e-03, PNorm = 172.7001, GNorm = 0.2125, lr_0 = 1.7206e-04
Loss = 1.3138e-03, PNorm = 172.7031, GNorm = 0.1159, lr_0 = 1.7194e-04
Loss = 2.3262e-03, PNorm = 172.7060, GNorm = 0.2748, lr_0 = 1.7183e-04
Loss = 1.7739e-03, PNorm = 172.7091, GNorm = 0.1040, lr_0 = 1.7171e-04
Loss = 2.8320e-03, PNorm = 172.7120, GNorm = 0.0939, lr_0 = 1.7159e-04
Loss = 2.5767e-03, PNorm = 172.7141, GNorm = 0.0983, lr_0 = 1.7147e-04
Loss = 1.3475e-03, PNorm = 172.7171, GNorm = 0.0990, lr_0 = 1.7136e-04
Loss = 3.6035e-03, PNorm = 172.7215, GNorm = 0.0864, lr_0 = 1.7124e-04
Loss = 2.1465e-03, PNorm = 172.7252, GNorm = 0.3084, lr_0 = 1.7112e-04
Loss = 1.5124e-03, PNorm = 172.7282, GNorm = 0.2482, lr_0 = 1.7100e-04
Loss = 1.4791e-03, PNorm = 172.7307, GNorm = 0.0659, lr_0 = 1.7089e-04
Loss = 1.4936e-03, PNorm = 172.7329, GNorm = 0.1471, lr_0 = 1.7077e-04
Loss = 3.8757e-03, PNorm = 172.7358, GNorm = 0.0679, lr_0 = 1.7065e-04
Loss = 1.8394e-03, PNorm = 172.7399, GNorm = 0.0980, lr_0 = 1.7054e-04
Loss = 1.3805e-03, PNorm = 172.7442, GNorm = 0.1290, lr_0 = 1.7042e-04
Loss = 2.5915e-03, PNorm = 172.7487, GNorm = 0.1123, lr_0 = 1.7030e-04
Loss = 1.6449e-03, PNorm = 172.7524, GNorm = 0.2624, lr_0 = 1.7019e-04
Loss = 1.3620e-03, PNorm = 172.7556, GNorm = 0.1691, lr_0 = 1.7007e-04
Loss = 3.1767e-03, PNorm = 172.7570, GNorm = 0.0534, lr_0 = 1.6995e-04
Loss = 2.0720e-03, PNorm = 172.7595, GNorm = 0.1941, lr_0 = 1.6984e-04
Loss = 1.5681e-03, PNorm = 172.7608, GNorm = 0.0574, lr_0 = 1.6972e-04
Loss = 2.1613e-03, PNorm = 172.7625, GNorm = 0.2580, lr_0 = 1.6960e-04
Loss = 1.5239e-03, PNorm = 172.7660, GNorm = 0.0801, lr_0 = 1.6949e-04
Loss = 1.4482e-03, PNorm = 172.7704, GNorm = 0.1743, lr_0 = 1.6937e-04
Loss = 3.3857e-03, PNorm = 172.7730, GNorm = 0.0389, lr_0 = 1.6926e-04
Loss = 1.7370e-03, PNorm = 172.7759, GNorm = 0.0761, lr_0 = 1.6914e-04
Loss = 1.9248e-03, PNorm = 172.7788, GNorm = 0.3143, lr_0 = 1.6902e-04
Loss = 3.0292e-03, PNorm = 172.7827, GNorm = 0.1024, lr_0 = 1.6891e-04
Loss = 1.9112e-03, PNorm = 172.7871, GNorm = 0.2039, lr_0 = 1.6879e-04
Loss = 2.9078e-03, PNorm = 172.7902, GNorm = 0.2054, lr_0 = 1.6868e-04
Loss = 1.7745e-03, PNorm = 172.7961, GNorm = 0.1524, lr_0 = 1.6856e-04
Loss = 2.5853e-03, PNorm = 172.7991, GNorm = 0.1412, lr_0 = 1.6845e-04
Loss = 3.1311e-03, PNorm = 172.8021, GNorm = 0.1846, lr_0 = 1.6833e-04
Loss = 1.9028e-03, PNorm = 172.8064, GNorm = 0.1094, lr_0 = 1.6821e-04
Loss = 2.8639e-03, PNorm = 172.8100, GNorm = 0.3508, lr_0 = 1.6810e-04
Loss = 1.7468e-03, PNorm = 172.8146, GNorm = 0.1924, lr_0 = 1.6798e-04
Loss = 1.2467e-03, PNorm = 172.8191, GNorm = 0.1068, lr_0 = 1.6787e-04
Loss = 2.1870e-03, PNorm = 172.8226, GNorm = 0.2199, lr_0 = 1.6775e-04
Loss = 1.5424e-03, PNorm = 172.8255, GNorm = 0.0583, lr_0 = 1.6764e-04
Loss = 4.0456e-03, PNorm = 172.8275, GNorm = 0.4320, lr_0 = 1.6752e-04
Loss = 2.8073e-03, PNorm = 172.8279, GNorm = 0.0985, lr_0 = 1.6741e-04
Loss = 1.5459e-03, PNorm = 172.8309, GNorm = 0.1353, lr_0 = 1.6729e-04
Loss = 1.4393e-03, PNorm = 172.8328, GNorm = 0.2433, lr_0 = 1.6718e-04
Loss = 1.5851e-03, PNorm = 172.8351, GNorm = 0.0637, lr_0 = 1.6707e-04
Loss = 1.7758e-03, PNorm = 172.8369, GNorm = 0.1825, lr_0 = 1.6695e-04
Loss = 2.4508e-03, PNorm = 172.8390, GNorm = 0.0779, lr_0 = 1.6684e-04
Loss = 1.4301e-03, PNorm = 172.8409, GNorm = 0.1865, lr_0 = 1.6672e-04
Loss = 2.5789e-03, PNorm = 172.8451, GNorm = 0.0687, lr_0 = 1.6661e-04
Loss = 3.5969e-03, PNorm = 172.8478, GNorm = 0.0650, lr_0 = 1.6649e-04
Loss = 2.7451e-03, PNorm = 172.8499, GNorm = 0.1099, lr_0 = 1.6638e-04
Loss = 1.5608e-03, PNorm = 172.8535, GNorm = 0.0668, lr_0 = 1.6627e-04
Loss = 4.2400e-03, PNorm = 172.8565, GNorm = 0.3336, lr_0 = 1.6615e-04
Loss = 3.9963e-03, PNorm = 172.8601, GNorm = 0.3021, lr_0 = 1.6604e-04
Loss = 1.5326e-03, PNorm = 172.8624, GNorm = 0.1569, lr_0 = 1.6592e-04
Loss = 2.9213e-03, PNorm = 172.8666, GNorm = 0.0902, lr_0 = 1.6581e-04
Loss = 1.5833e-03, PNorm = 172.8717, GNorm = 0.0787, lr_0 = 1.6570e-04
Loss = 2.2559e-03, PNorm = 172.8750, GNorm = 0.0999, lr_0 = 1.6558e-04
Loss = 1.6866e-03, PNorm = 172.8795, GNorm = 0.0577, lr_0 = 1.6547e-04
Loss = 2.5620e-03, PNorm = 172.8815, GNorm = 0.1089, lr_0 = 1.6536e-04
Loss = 2.2593e-03, PNorm = 172.8826, GNorm = 0.1096, lr_0 = 1.6524e-04
Loss = 1.9907e-03, PNorm = 172.8858, GNorm = 0.1659, lr_0 = 1.6513e-04
Loss = 1.8271e-03, PNorm = 172.8893, GNorm = 0.1228, lr_0 = 1.6502e-04
Loss = 2.5160e-03, PNorm = 172.8926, GNorm = 0.1185, lr_0 = 1.6490e-04
Loss = 1.5810e-03, PNorm = 172.8968, GNorm = 0.0692, lr_0 = 1.6479e-04
Loss = 2.1406e-03, PNorm = 172.9009, GNorm = 0.2852, lr_0 = 1.6468e-04
Loss = 1.6775e-03, PNorm = 172.9033, GNorm = 0.3118, lr_0 = 1.6457e-04
Loss = 1.6951e-03, PNorm = 172.9066, GNorm = 0.1567, lr_0 = 1.6445e-04
Loss = 2.8499e-03, PNorm = 172.9087, GNorm = 0.0610, lr_0 = 1.6434e-04
Loss = 2.5051e-03, PNorm = 172.9129, GNorm = 0.2000, lr_0 = 1.6423e-04
Loss = 4.7205e-03, PNorm = 172.9160, GNorm = 0.0439, lr_0 = 1.6412e-04
Loss = 2.0753e-03, PNorm = 172.9190, GNorm = 0.1353, lr_0 = 1.6400e-04
Loss = 1.6741e-03, PNorm = 172.9218, GNorm = 0.1228, lr_0 = 1.6389e-04
Loss = 1.4747e-03, PNorm = 172.9260, GNorm = 0.1516, lr_0 = 1.6378e-04
Validation mae = 0.278353
Epoch 24
Loss = 3.6163e-03, PNorm = 172.9301, GNorm = 0.4954, lr_0 = 1.6367e-04
Loss = 1.3892e-03, PNorm = 172.9323, GNorm = 0.2509, lr_0 = 1.6355e-04
Loss = 1.5011e-03, PNorm = 172.9335, GNorm = 0.1500, lr_0 = 1.6344e-04
Loss = 1.4539e-03, PNorm = 172.9355, GNorm = 0.2011, lr_0 = 1.6333e-04
Loss = 1.2761e-03, PNorm = 172.9365, GNorm = 0.2011, lr_0 = 1.6322e-04
Loss = 2.8735e-03, PNorm = 172.9394, GNorm = 0.1573, lr_0 = 1.6311e-04
Loss = 1.7925e-03, PNorm = 172.9411, GNorm = 0.0953, lr_0 = 1.6299e-04
Loss = 3.7305e-03, PNorm = 172.9446, GNorm = 0.0646, lr_0 = 1.6288e-04
Loss = 1.2101e-03, PNorm = 172.9466, GNorm = 0.0754, lr_0 = 1.6277e-04
Loss = 1.1492e-03, PNorm = 172.9494, GNorm = 0.0939, lr_0 = 1.6266e-04
Loss = 1.8133e-03, PNorm = 172.9510, GNorm = 0.0549, lr_0 = 1.6255e-04
Loss = 1.4767e-03, PNorm = 172.9525, GNorm = 0.0321, lr_0 = 1.6244e-04
Loss = 1.3254e-03, PNorm = 172.9560, GNorm = 0.0403, lr_0 = 1.6233e-04
Loss = 1.4438e-03, PNorm = 172.9611, GNorm = 0.1473, lr_0 = 1.6221e-04
Loss = 2.1256e-03, PNorm = 172.9642, GNorm = 0.1256, lr_0 = 1.6210e-04
Loss = 2.0086e-03, PNorm = 172.9656, GNorm = 0.2284, lr_0 = 1.6199e-04
Loss = 2.5871e-03, PNorm = 172.9690, GNorm = 0.1776, lr_0 = 1.6188e-04
Loss = 2.8991e-03, PNorm = 172.9725, GNorm = 0.0719, lr_0 = 1.6177e-04
Loss = 1.6844e-03, PNorm = 172.9736, GNorm = 0.1941, lr_0 = 1.6166e-04
Loss = 2.5848e-03, PNorm = 172.9783, GNorm = 0.2086, lr_0 = 1.6155e-04
Loss = 3.7432e-03, PNorm = 172.9808, GNorm = 0.0769, lr_0 = 1.6144e-04
Loss = 1.8234e-03, PNorm = 172.9854, GNorm = 0.1812, lr_0 = 1.6133e-04
Loss = 2.9169e-03, PNorm = 172.9892, GNorm = 0.0520, lr_0 = 1.6122e-04
Loss = 1.9837e-03, PNorm = 172.9912, GNorm = 0.1244, lr_0 = 1.6111e-04
Loss = 1.5494e-03, PNorm = 172.9929, GNorm = 0.0690, lr_0 = 1.6100e-04
Loss = 1.6427e-03, PNorm = 172.9939, GNorm = 0.1419, lr_0 = 1.6089e-04
Loss = 2.3708e-03, PNorm = 172.9980, GNorm = 0.1602, lr_0 = 1.6078e-04
Loss = 2.0616e-03, PNorm = 172.9998, GNorm = 0.3618, lr_0 = 1.6067e-04
Loss = 1.2078e-03, PNorm = 173.0031, GNorm = 0.0900, lr_0 = 1.6056e-04
Loss = 2.0585e-03, PNorm = 173.0045, GNorm = 0.0840, lr_0 = 1.6045e-04
Loss = 1.2655e-03, PNorm = 173.0069, GNorm = 0.0871, lr_0 = 1.6034e-04
Loss = 1.8479e-03, PNorm = 173.0100, GNorm = 0.0287, lr_0 = 1.6023e-04
Loss = 1.7195e-03, PNorm = 173.0122, GNorm = 0.2117, lr_0 = 1.6012e-04
Loss = 1.7296e-03, PNorm = 173.0149, GNorm = 0.0981, lr_0 = 1.6001e-04
Loss = 1.5080e-03, PNorm = 173.0160, GNorm = 0.2033, lr_0 = 1.5990e-04
Loss = 1.8894e-03, PNorm = 173.0166, GNorm = 0.3553, lr_0 = 1.5979e-04
Loss = 1.2565e-03, PNorm = 173.0171, GNorm = 0.1798, lr_0 = 1.5968e-04
Loss = 3.4944e-03, PNorm = 173.0186, GNorm = 0.3433, lr_0 = 1.5957e-04
Loss = 2.4511e-03, PNorm = 173.0223, GNorm = 0.1404, lr_0 = 1.5946e-04
Loss = 3.7751e-03, PNorm = 173.0259, GNorm = 0.2468, lr_0 = 1.5935e-04
Loss = 1.8631e-03, PNorm = 173.0301, GNorm = 0.0877, lr_0 = 1.5924e-04
Loss = 1.6134e-03, PNorm = 173.0323, GNorm = 0.1631, lr_0 = 1.5913e-04
Loss = 1.8297e-03, PNorm = 173.0348, GNorm = 0.5159, lr_0 = 1.5902e-04
Loss = 1.4820e-03, PNorm = 173.0394, GNorm = 0.1004, lr_0 = 1.5891e-04
Loss = 2.3376e-03, PNorm = 173.0430, GNorm = 0.0597, lr_0 = 1.5880e-04
Loss = 2.6227e-03, PNorm = 173.0466, GNorm = 0.1221, lr_0 = 1.5870e-04
Loss = 1.3334e-03, PNorm = 173.0479, GNorm = 0.0508, lr_0 = 1.5859e-04
Loss = 1.8657e-03, PNorm = 173.0488, GNorm = 0.1175, lr_0 = 1.5848e-04
Loss = 1.9025e-03, PNorm = 173.0498, GNorm = 0.1623, lr_0 = 1.5837e-04
Loss = 1.8806e-03, PNorm = 173.0531, GNorm = 0.0654, lr_0 = 1.5826e-04
Loss = 2.0057e-03, PNorm = 173.0558, GNorm = 0.1212, lr_0 = 1.5815e-04
Loss = 1.2683e-03, PNorm = 173.0591, GNorm = 0.1927, lr_0 = 1.5804e-04
Loss = 2.6646e-03, PNorm = 173.0625, GNorm = 0.0936, lr_0 = 1.5794e-04
Loss = 1.2722e-03, PNorm = 173.0648, GNorm = 0.1757, lr_0 = 1.5783e-04
Loss = 1.3137e-03, PNorm = 173.0685, GNorm = 0.0513, lr_0 = 1.5772e-04
Loss = 1.1688e-03, PNorm = 173.0721, GNorm = 0.1196, lr_0 = 1.5761e-04
Loss = 1.6147e-03, PNorm = 173.0743, GNorm = 0.0564, lr_0 = 1.5750e-04
Loss = 2.6116e-03, PNorm = 173.0780, GNorm = 0.1262, lr_0 = 1.5740e-04
Loss = 1.6326e-03, PNorm = 173.0800, GNorm = 0.2039, lr_0 = 1.5729e-04
Loss = 3.8485e-03, PNorm = 173.0814, GNorm = 0.4623, lr_0 = 1.5718e-04
Loss = 2.8596e-03, PNorm = 173.0843, GNorm = 0.1941, lr_0 = 1.5707e-04
Loss = 1.4370e-03, PNorm = 173.0869, GNorm = 0.1850, lr_0 = 1.5697e-04
Loss = 1.6805e-03, PNorm = 173.0900, GNorm = 0.2392, lr_0 = 1.5686e-04
Loss = 2.2466e-03, PNorm = 173.0946, GNorm = 0.2085, lr_0 = 1.5675e-04
Loss = 1.9685e-03, PNorm = 173.0970, GNorm = 0.5637, lr_0 = 1.5664e-04
Loss = 1.9382e-03, PNorm = 173.1023, GNorm = 0.3108, lr_0 = 1.5654e-04
Loss = 2.4254e-03, PNorm = 173.1055, GNorm = 0.1444, lr_0 = 1.5643e-04
Loss = 1.4362e-03, PNorm = 173.1070, GNorm = 0.3278, lr_0 = 1.5632e-04
Loss = 1.9112e-03, PNorm = 173.1075, GNorm = 0.2197, lr_0 = 1.5621e-04
Loss = 1.1135e-03, PNorm = 173.1101, GNorm = 0.0889, lr_0 = 1.5611e-04
Loss = 2.8153e-03, PNorm = 173.1130, GNorm = 0.0433, lr_0 = 1.5600e-04
Loss = 1.6450e-03, PNorm = 173.1153, GNorm = 0.0407, lr_0 = 1.5589e-04
Loss = 1.0399e-03, PNorm = 173.1172, GNorm = 0.0685, lr_0 = 1.5579e-04
Loss = 1.5140e-03, PNorm = 173.1186, GNorm = 0.0979, lr_0 = 1.5568e-04
Loss = 1.1429e-03, PNorm = 173.1233, GNorm = 0.0918, lr_0 = 1.5557e-04
Loss = 2.1534e-03, PNorm = 173.1269, GNorm = 0.1592, lr_0 = 1.5547e-04
Loss = 2.2934e-03, PNorm = 173.1305, GNorm = 0.0439, lr_0 = 1.5536e-04
Loss = 1.6508e-03, PNorm = 173.1328, GNorm = 0.0720, lr_0 = 1.5525e-04
Loss = 1.3210e-03, PNorm = 173.1339, GNorm = 0.0544, lr_0 = 1.5515e-04
Loss = 2.0226e-03, PNorm = 173.1351, GNorm = 0.1873, lr_0 = 1.5504e-04
Loss = 1.6269e-03, PNorm = 173.1395, GNorm = 0.1822, lr_0 = 1.5493e-04
Loss = 1.5565e-03, PNorm = 173.1439, GNorm = 0.3617, lr_0 = 1.5483e-04
Loss = 1.3449e-03, PNorm = 173.1467, GNorm = 0.0533, lr_0 = 1.5472e-04
Loss = 1.2115e-03, PNorm = 173.1477, GNorm = 0.2143, lr_0 = 1.5462e-04
Loss = 1.1590e-03, PNorm = 173.1505, GNorm = 0.1037, lr_0 = 1.5451e-04
Loss = 1.4982e-03, PNorm = 173.1526, GNorm = 0.1103, lr_0 = 1.5440e-04
Loss = 1.4781e-03, PNorm = 173.1560, GNorm = 0.0914, lr_0 = 1.5430e-04
Loss = 1.1259e-03, PNorm = 173.1578, GNorm = 0.0821, lr_0 = 1.5419e-04
Loss = 1.7218e-03, PNorm = 173.1608, GNorm = 0.1138, lr_0 = 1.5409e-04
Loss = 1.8124e-03, PNorm = 173.1633, GNorm = 0.2039, lr_0 = 1.5398e-04
Loss = 1.8579e-03, PNorm = 173.1647, GNorm = 0.0457, lr_0 = 1.5388e-04
Loss = 2.3785e-03, PNorm = 173.1677, GNorm = 0.1936, lr_0 = 1.5377e-04
Loss = 1.1168e-03, PNorm = 173.1702, GNorm = 0.0855, lr_0 = 1.5367e-04
Loss = 2.0040e-03, PNorm = 173.1736, GNorm = 0.1296, lr_0 = 1.5356e-04
Loss = 1.2271e-03, PNorm = 173.1756, GNorm = 0.1024, lr_0 = 1.5346e-04
Loss = 2.0013e-03, PNorm = 173.1777, GNorm = 0.1196, lr_0 = 1.5335e-04
Loss = 1.4770e-03, PNorm = 173.1800, GNorm = 0.1489, lr_0 = 1.5325e-04
Loss = 2.2813e-03, PNorm = 173.1823, GNorm = 0.2670, lr_0 = 1.5314e-04
Loss = 1.2922e-03, PNorm = 173.1846, GNorm = 0.1172, lr_0 = 1.5304e-04
Loss = 1.3984e-03, PNorm = 173.1850, GNorm = 0.0832, lr_0 = 1.5293e-04
Loss = 2.6755e-03, PNorm = 173.1865, GNorm = 0.0663, lr_0 = 1.5283e-04
Loss = 2.0785e-03, PNorm = 173.1896, GNorm = 0.1986, lr_0 = 1.5272e-04
Loss = 1.1143e-03, PNorm = 173.1939, GNorm = 0.2091, lr_0 = 1.5262e-04
Loss = 1.2378e-03, PNorm = 173.1972, GNorm = 0.1692, lr_0 = 1.5251e-04
Loss = 3.0958e-03, PNorm = 173.2025, GNorm = 0.0433, lr_0 = 1.5241e-04
Loss = 1.5674e-03, PNorm = 173.2061, GNorm = 0.0997, lr_0 = 1.5230e-04
Loss = 3.1757e-03, PNorm = 173.2085, GNorm = 0.0549, lr_0 = 1.5220e-04
Loss = 2.5951e-03, PNorm = 173.2111, GNorm = 0.2160, lr_0 = 1.5209e-04
Loss = 2.6999e-03, PNorm = 173.2139, GNorm = 0.1703, lr_0 = 1.5199e-04
Loss = 1.1234e-03, PNorm = 173.2187, GNorm = 0.0543, lr_0 = 1.5189e-04
Loss = 1.9305e-03, PNorm = 173.2202, GNorm = 0.2806, lr_0 = 1.5178e-04
Loss = 2.1055e-03, PNorm = 173.2221, GNorm = 0.0839, lr_0 = 1.5168e-04
Loss = 1.5849e-03, PNorm = 173.2234, GNorm = 0.1865, lr_0 = 1.5157e-04
Loss = 2.2510e-03, PNorm = 173.2267, GNorm = 0.0827, lr_0 = 1.5147e-04
Loss = 1.1066e-03, PNorm = 173.2299, GNorm = 0.0531, lr_0 = 1.5137e-04
Loss = 1.5819e-03, PNorm = 173.2317, GNorm = 0.2458, lr_0 = 1.5126e-04
Loss = 1.4502e-03, PNorm = 173.2327, GNorm = 0.1525, lr_0 = 1.5116e-04
Loss = 3.7932e-03, PNorm = 173.2345, GNorm = 0.0887, lr_0 = 1.5106e-04
Loss = 2.7010e-03, PNorm = 173.2356, GNorm = 0.1350, lr_0 = 1.5095e-04
Loss = 1.8012e-03, PNorm = 173.2394, GNorm = 0.2056, lr_0 = 1.5085e-04
Validation mae = 0.278146
Epoch 25
Loss = 1.6997e-03, PNorm = 173.2435, GNorm = 0.1399, lr_0 = 1.5075e-04
Loss = 2.9478e-03, PNorm = 173.2447, GNorm = 0.1278, lr_0 = 1.5064e-04
Loss = 1.2825e-03, PNorm = 173.2460, GNorm = 0.0373, lr_0 = 1.5054e-04
Loss = 1.2775e-03, PNorm = 173.2482, GNorm = 0.1555, lr_0 = 1.5044e-04
Loss = 8.5450e-04, PNorm = 173.2507, GNorm = 0.1509, lr_0 = 1.5033e-04
Loss = 1.7186e-03, PNorm = 173.2535, GNorm = 0.0858, lr_0 = 1.5023e-04
Loss = 1.7863e-03, PNorm = 173.2569, GNorm = 0.1158, lr_0 = 1.5013e-04
Loss = 7.8302e-04, PNorm = 173.2578, GNorm = 0.0644, lr_0 = 1.5002e-04
Loss = 1.1646e-03, PNorm = 173.2604, GNorm = 0.1264, lr_0 = 1.4992e-04
Loss = 8.7128e-04, PNorm = 173.2617, GNorm = 0.0273, lr_0 = 1.4982e-04
Loss = 1.4228e-03, PNorm = 173.2630, GNorm = 0.1699, lr_0 = 1.4972e-04
Loss = 1.1511e-03, PNorm = 173.2656, GNorm = 0.1148, lr_0 = 1.4961e-04
Loss = 2.0940e-03, PNorm = 173.2682, GNorm = 0.1255, lr_0 = 1.4951e-04
Loss = 2.8095e-03, PNorm = 173.2698, GNorm = 0.1714, lr_0 = 1.4941e-04
Loss = 1.4577e-03, PNorm = 173.2719, GNorm = 0.1414, lr_0 = 1.4931e-04
Loss = 1.5443e-03, PNorm = 173.2745, GNorm = 0.0779, lr_0 = 1.4920e-04
Loss = 1.3154e-03, PNorm = 173.2767, GNorm = 0.0693, lr_0 = 1.4910e-04
Loss = 1.0777e-03, PNorm = 173.2772, GNorm = 0.1475, lr_0 = 1.4900e-04
Loss = 1.1695e-03, PNorm = 173.2771, GNorm = 0.0657, lr_0 = 1.4890e-04
Loss = 1.2759e-03, PNorm = 173.2789, GNorm = 0.1243, lr_0 = 1.4880e-04
Loss = 3.1161e-03, PNorm = 173.2808, GNorm = 0.2064, lr_0 = 1.4869e-04
Loss = 3.6078e-03, PNorm = 173.2829, GNorm = 0.2588, lr_0 = 1.4859e-04
Loss = 3.9640e-03, PNorm = 173.2849, GNorm = 0.1389, lr_0 = 1.4849e-04
Loss = 1.0292e-03, PNorm = 173.2882, GNorm = 0.0810, lr_0 = 1.4839e-04
Loss = 1.3078e-03, PNorm = 173.2904, GNorm = 0.1950, lr_0 = 1.4829e-04
Loss = 9.6310e-04, PNorm = 173.2931, GNorm = 0.1040, lr_0 = 1.4818e-04
Loss = 1.2606e-03, PNorm = 173.2950, GNorm = 0.0437, lr_0 = 1.4808e-04
Loss = 1.4926e-03, PNorm = 173.2963, GNorm = 0.1349, lr_0 = 1.4798e-04
Loss = 1.0969e-03, PNorm = 173.2980, GNorm = 0.0494, lr_0 = 1.4788e-04
Loss = 9.7177e-04, PNorm = 173.3007, GNorm = 0.0400, lr_0 = 1.4778e-04
Loss = 1.1072e-03, PNorm = 173.3019, GNorm = 0.1151, lr_0 = 1.4768e-04
Loss = 9.3907e-04, PNorm = 173.3032, GNorm = 0.0969, lr_0 = 1.4758e-04
Loss = 2.8388e-03, PNorm = 173.3047, GNorm = 0.1980, lr_0 = 1.4748e-04
Loss = 1.0892e-03, PNorm = 173.3052, GNorm = 0.0872, lr_0 = 1.4737e-04
Loss = 1.6965e-03, PNorm = 173.3086, GNorm = 0.1165, lr_0 = 1.4727e-04
Loss = 9.5809e-04, PNorm = 173.3096, GNorm = 0.0403, lr_0 = 1.4717e-04
Loss = 1.0706e-03, PNorm = 173.3113, GNorm = 0.1561, lr_0 = 1.4707e-04
Loss = 9.4203e-04, PNorm = 173.3116, GNorm = 0.0329, lr_0 = 1.4697e-04
Loss = 1.7960e-03, PNorm = 173.3133, GNorm = 0.1675, lr_0 = 1.4687e-04
Loss = 1.4745e-03, PNorm = 173.3160, GNorm = 0.1169, lr_0 = 1.4677e-04
Loss = 1.4327e-03, PNorm = 173.3185, GNorm = 0.0603, lr_0 = 1.4667e-04
Loss = 1.9525e-03, PNorm = 173.3205, GNorm = 0.0850, lr_0 = 1.4657e-04
Loss = 1.1730e-03, PNorm = 173.3231, GNorm = 0.2097, lr_0 = 1.4647e-04
Loss = 1.9635e-03, PNorm = 173.3250, GNorm = 0.0870, lr_0 = 1.4637e-04
Loss = 1.9797e-03, PNorm = 173.3269, GNorm = 0.0903, lr_0 = 1.4627e-04
Loss = 1.9723e-03, PNorm = 173.3296, GNorm = 0.0965, lr_0 = 1.4617e-04
Loss = 1.1174e-03, PNorm = 173.3325, GNorm = 0.1819, lr_0 = 1.4607e-04
Loss = 2.1762e-03, PNorm = 173.3344, GNorm = 0.0469, lr_0 = 1.4597e-04
Loss = 1.3675e-03, PNorm = 173.3373, GNorm = 0.2900, lr_0 = 1.4587e-04
Loss = 1.2754e-03, PNorm = 173.3395, GNorm = 0.1202, lr_0 = 1.4577e-04
Loss = 1.1313e-03, PNorm = 173.3411, GNorm = 0.0484, lr_0 = 1.4567e-04
Loss = 2.0843e-03, PNorm = 173.3441, GNorm = 0.0565, lr_0 = 1.4557e-04
Loss = 1.0701e-03, PNorm = 173.3463, GNorm = 0.1525, lr_0 = 1.4547e-04
Loss = 1.0112e-03, PNorm = 173.3482, GNorm = 0.1340, lr_0 = 1.4537e-04
Loss = 2.1808e-03, PNorm = 173.3503, GNorm = 0.2767, lr_0 = 1.4527e-04
Loss = 1.0276e-03, PNorm = 173.3539, GNorm = 0.0964, lr_0 = 1.4517e-04
Loss = 1.5276e-03, PNorm = 173.3577, GNorm = 0.0727, lr_0 = 1.4507e-04
Loss = 1.8391e-03, PNorm = 173.3600, GNorm = 0.0729, lr_0 = 1.4497e-04
Loss = 1.2390e-03, PNorm = 173.3609, GNorm = 0.0633, lr_0 = 1.4487e-04
Loss = 2.0481e-03, PNorm = 173.3618, GNorm = 0.1576, lr_0 = 1.4477e-04
Loss = 3.3774e-03, PNorm = 173.3628, GNorm = 0.0590, lr_0 = 1.4467e-04
Loss = 1.6921e-03, PNorm = 173.3663, GNorm = 0.0775, lr_0 = 1.4457e-04
Loss = 1.8541e-03, PNorm = 173.3684, GNorm = 0.0598, lr_0 = 1.4447e-04
Loss = 9.0288e-04, PNorm = 173.3706, GNorm = 0.1375, lr_0 = 1.4438e-04
Loss = 2.2432e-03, PNorm = 173.3719, GNorm = 0.1457, lr_0 = 1.4428e-04
Loss = 2.3211e-03, PNorm = 173.3742, GNorm = 0.1228, lr_0 = 1.4418e-04
Loss = 2.7969e-03, PNorm = 173.3769, GNorm = 0.1462, lr_0 = 1.4408e-04
Loss = 1.8149e-03, PNorm = 173.3791, GNorm = 0.4020, lr_0 = 1.4398e-04
Loss = 2.8292e-03, PNorm = 173.3832, GNorm = 0.0348, lr_0 = 1.4388e-04
Loss = 1.7357e-03, PNorm = 173.3860, GNorm = 0.0410, lr_0 = 1.4378e-04
Loss = 2.1060e-03, PNorm = 173.3883, GNorm = 0.1543, lr_0 = 1.4368e-04
Loss = 1.4186e-03, PNorm = 173.3918, GNorm = 0.1448, lr_0 = 1.4359e-04
Loss = 1.1158e-03, PNorm = 173.3946, GNorm = 0.2129, lr_0 = 1.4349e-04
Loss = 1.7120e-03, PNorm = 173.3967, GNorm = 0.0610, lr_0 = 1.4339e-04
Loss = 1.1198e-03, PNorm = 173.3977, GNorm = 0.0776, lr_0 = 1.4329e-04
Loss = 3.6131e-03, PNorm = 173.3979, GNorm = 0.0727, lr_0 = 1.4319e-04
Loss = 1.9674e-03, PNorm = 173.4022, GNorm = 0.0957, lr_0 = 1.4310e-04
Loss = 3.2534e-03, PNorm = 173.4073, GNorm = 0.4266, lr_0 = 1.4300e-04
Loss = 9.3839e-04, PNorm = 173.4107, GNorm = 0.1318, lr_0 = 1.4290e-04
Loss = 1.2719e-03, PNorm = 173.4125, GNorm = 0.1356, lr_0 = 1.4280e-04
Loss = 2.3093e-03, PNorm = 173.4133, GNorm = 0.1354, lr_0 = 1.4270e-04
Loss = 1.2214e-03, PNorm = 173.4146, GNorm = 0.1645, lr_0 = 1.4261e-04
Loss = 1.2425e-03, PNorm = 173.4154, GNorm = 0.1144, lr_0 = 1.4251e-04
Loss = 3.0494e-03, PNorm = 173.4179, GNorm = 0.0541, lr_0 = 1.4241e-04
Loss = 1.0194e-03, PNorm = 173.4221, GNorm = 0.0746, lr_0 = 1.4231e-04
Loss = 8.0014e-04, PNorm = 173.4254, GNorm = 0.1959, lr_0 = 1.4222e-04
Loss = 1.6873e-03, PNorm = 173.4275, GNorm = 0.1259, lr_0 = 1.4212e-04
Loss = 1.2695e-03, PNorm = 173.4305, GNorm = 0.1817, lr_0 = 1.4202e-04
Loss = 1.9233e-03, PNorm = 173.4343, GNorm = 0.1135, lr_0 = 1.4192e-04
Loss = 1.4389e-03, PNorm = 173.4361, GNorm = 0.2390, lr_0 = 1.4183e-04
Loss = 3.8049e-03, PNorm = 173.4400, GNorm = 0.2201, lr_0 = 1.4173e-04
Loss = 1.7085e-03, PNorm = 173.4430, GNorm = 0.0998, lr_0 = 1.4163e-04
Loss = 1.1733e-03, PNorm = 173.4462, GNorm = 0.1308, lr_0 = 1.4153e-04
Loss = 9.7546e-04, PNorm = 173.4480, GNorm = 0.0726, lr_0 = 1.4144e-04
Loss = 5.3427e-03, PNorm = 173.4484, GNorm = 0.3311, lr_0 = 1.4134e-04
Loss = 2.6945e-03, PNorm = 173.4517, GNorm = 0.0697, lr_0 = 1.4124e-04
Loss = 1.1432e-03, PNorm = 173.4538, GNorm = 0.0808, lr_0 = 1.4115e-04
Loss = 1.8086e-03, PNorm = 173.4572, GNorm = 0.0980, lr_0 = 1.4105e-04
Loss = 1.2514e-03, PNorm = 173.4586, GNorm = 0.0466, lr_0 = 1.4095e-04
Loss = 1.6882e-03, PNorm = 173.4608, GNorm = 0.1296, lr_0 = 1.4086e-04
Loss = 1.1074e-03, PNorm = 173.4639, GNorm = 0.0459, lr_0 = 1.4076e-04
Loss = 1.5875e-03, PNorm = 173.4668, GNorm = 0.0950, lr_0 = 1.4066e-04
Loss = 1.3098e-03, PNorm = 173.4676, GNorm = 0.0628, lr_0 = 1.4057e-04
Loss = 2.0935e-03, PNorm = 173.4707, GNorm = 0.4914, lr_0 = 1.4047e-04
Loss = 1.3030e-03, PNorm = 173.4747, GNorm = 0.1016, lr_0 = 1.4038e-04
Loss = 1.7782e-03, PNorm = 173.4770, GNorm = 0.0696, lr_0 = 1.4028e-04
Loss = 1.0824e-03, PNorm = 173.4797, GNorm = 0.2656, lr_0 = 1.4018e-04
Loss = 1.0984e-03, PNorm = 173.4805, GNorm = 0.1003, lr_0 = 1.4009e-04
Loss = 2.3820e-03, PNorm = 173.4831, GNorm = 0.0970, lr_0 = 1.3999e-04
Loss = 2.2825e-03, PNorm = 173.4855, GNorm = 0.1263, lr_0 = 1.3990e-04
Loss = 3.6611e-03, PNorm = 173.4865, GNorm = 0.1417, lr_0 = 1.3980e-04
Loss = 2.3592e-03, PNorm = 173.4867, GNorm = 0.1641, lr_0 = 1.3970e-04
Loss = 1.0884e-03, PNorm = 173.4890, GNorm = 0.0488, lr_0 = 1.3961e-04
Loss = 1.3655e-03, PNorm = 173.4916, GNorm = 0.0710, lr_0 = 1.3951e-04
Loss = 1.2143e-03, PNorm = 173.4937, GNorm = 0.0593, lr_0 = 1.3942e-04
Loss = 2.8011e-03, PNorm = 173.4961, GNorm = 0.0897, lr_0 = 1.3932e-04
Loss = 1.2512e-03, PNorm = 173.4994, GNorm = 0.0986, lr_0 = 1.3923e-04
Loss = 1.5796e-03, PNorm = 173.5025, GNorm = 0.1107, lr_0 = 1.3913e-04
Loss = 1.8280e-03, PNorm = 173.5058, GNorm = 0.0706, lr_0 = 1.3904e-04
Loss = 2.0731e-03, PNorm = 173.5086, GNorm = 0.1503, lr_0 = 1.3894e-04
Validation mae = 0.278570
Epoch 26
Loss = 9.8806e-04, PNorm = 173.5119, GNorm = 0.0677, lr_0 = 1.3884e-04
Loss = 1.5583e-03, PNorm = 173.5118, GNorm = 0.1130, lr_0 = 1.3875e-04
Loss = 1.1551e-03, PNorm = 173.5115, GNorm = 0.0431, lr_0 = 1.3865e-04
Loss = 1.0678e-03, PNorm = 173.5135, GNorm = 0.1744, lr_0 = 1.3856e-04
Loss = 1.0371e-03, PNorm = 173.5156, GNorm = 0.1223, lr_0 = 1.3846e-04
Loss = 9.1409e-04, PNorm = 173.5171, GNorm = 0.1355, lr_0 = 1.3837e-04
Loss = 1.7776e-03, PNorm = 173.5193, GNorm = 0.2997, lr_0 = 1.3828e-04
Loss = 1.2816e-03, PNorm = 173.5212, GNorm = 0.1075, lr_0 = 1.3818e-04
Loss = 1.7046e-03, PNorm = 173.5224, GNorm = 0.1791, lr_0 = 1.3809e-04
Loss = 3.5166e-03, PNorm = 173.5230, GNorm = 0.0973, lr_0 = 1.3799e-04
Loss = 1.0602e-03, PNorm = 173.5242, GNorm = 0.1901, lr_0 = 1.3790e-04
Loss = 1.1569e-03, PNorm = 173.5265, GNorm = 0.1755, lr_0 = 1.3780e-04
Loss = 1.8745e-03, PNorm = 173.5302, GNorm = 0.0429, lr_0 = 1.3771e-04
Loss = 9.6588e-04, PNorm = 173.5344, GNorm = 0.1594, lr_0 = 1.3761e-04
Loss = 8.0634e-04, PNorm = 173.5362, GNorm = 0.0963, lr_0 = 1.3752e-04
Loss = 1.5535e-03, PNorm = 173.5392, GNorm = 0.0982, lr_0 = 1.3742e-04
Loss = 9.8226e-04, PNorm = 173.5414, GNorm = 0.0283, lr_0 = 1.3733e-04
Loss = 1.6660e-03, PNorm = 173.5429, GNorm = 0.0492, lr_0 = 1.3724e-04
Loss = 8.2003e-04, PNorm = 173.5442, GNorm = 0.0732, lr_0 = 1.3714e-04
Loss = 1.0422e-03, PNorm = 173.5446, GNorm = 0.0951, lr_0 = 1.3705e-04
Loss = 2.5439e-03, PNorm = 173.5460, GNorm = 0.0839, lr_0 = 1.3695e-04
Loss = 1.9430e-03, PNorm = 173.5490, GNorm = 0.2708, lr_0 = 1.3686e-04
Loss = 8.0412e-04, PNorm = 173.5525, GNorm = 0.1278, lr_0 = 1.3677e-04
Loss = 1.0044e-03, PNorm = 173.5547, GNorm = 0.0607, lr_0 = 1.3667e-04
Loss = 9.4284e-04, PNorm = 173.5550, GNorm = 0.2353, lr_0 = 1.3658e-04
Loss = 1.5008e-03, PNorm = 173.5567, GNorm = 0.0321, lr_0 = 1.3649e-04
Loss = 1.4728e-03, PNorm = 173.5577, GNorm = 0.1016, lr_0 = 1.3639e-04
Loss = 1.0291e-03, PNorm = 173.5596, GNorm = 0.1272, lr_0 = 1.3630e-04
Loss = 2.1150e-03, PNorm = 173.5622, GNorm = 0.0503, lr_0 = 1.3621e-04
Loss = 1.4685e-03, PNorm = 173.5643, GNorm = 0.0775, lr_0 = 1.3611e-04
Loss = 1.3938e-03, PNorm = 173.5658, GNorm = 0.0843, lr_0 = 1.3602e-04
Loss = 9.2448e-04, PNorm = 173.5676, GNorm = 0.1243, lr_0 = 1.3593e-04
Loss = 1.4891e-03, PNorm = 173.5704, GNorm = 0.0283, lr_0 = 1.3583e-04
Loss = 1.3177e-03, PNorm = 173.5715, GNorm = 0.1787, lr_0 = 1.3574e-04
Loss = 2.7637e-03, PNorm = 173.5734, GNorm = 0.1167, lr_0 = 1.3565e-04
Loss = 7.9100e-04, PNorm = 173.5743, GNorm = 0.1133, lr_0 = 1.3555e-04
Loss = 2.2736e-03, PNorm = 173.5750, GNorm = 0.1754, lr_0 = 1.3546e-04
Loss = 4.8742e-03, PNorm = 173.5762, GNorm = 0.5029, lr_0 = 1.3537e-04
Loss = 1.1312e-03, PNorm = 173.5787, GNorm = 0.0844, lr_0 = 1.3528e-04
Loss = 8.5181e-04, PNorm = 173.5808, GNorm = 0.0990, lr_0 = 1.3518e-04
Loss = 1.7524e-03, PNorm = 173.5821, GNorm = 0.3283, lr_0 = 1.3509e-04
Loss = 2.2411e-03, PNorm = 173.5838, GNorm = 0.0530, lr_0 = 1.3500e-04
Loss = 1.4529e-03, PNorm = 173.5876, GNorm = 0.3188, lr_0 = 1.3491e-04
Loss = 1.7000e-03, PNorm = 173.5910, GNorm = 0.0887, lr_0 = 1.3481e-04
Loss = 1.7866e-03, PNorm = 173.5936, GNorm = 0.1902, lr_0 = 1.3472e-04
Loss = 1.4281e-03, PNorm = 173.5958, GNorm = 0.2133, lr_0 = 1.3463e-04
Loss = 9.5423e-04, PNorm = 173.5987, GNorm = 0.0567, lr_0 = 1.3454e-04
Loss = 7.5160e-04, PNorm = 173.5997, GNorm = 0.0785, lr_0 = 1.3444e-04
Loss = 1.5214e-03, PNorm = 173.5997, GNorm = 0.0470, lr_0 = 1.3435e-04
Loss = 2.6746e-03, PNorm = 173.6019, GNorm = 0.0660, lr_0 = 1.3426e-04
Loss = 9.5190e-04, PNorm = 173.6056, GNorm = 0.0732, lr_0 = 1.3417e-04
Loss = 2.2977e-03, PNorm = 173.6089, GNorm = 0.1134, lr_0 = 1.3408e-04
Loss = 2.2463e-03, PNorm = 173.6108, GNorm = 0.0244, lr_0 = 1.3398e-04
Loss = 9.8232e-04, PNorm = 173.6130, GNorm = 0.0477, lr_0 = 1.3389e-04
Loss = 8.0115e-04, PNorm = 173.6151, GNorm = 0.0443, lr_0 = 1.3380e-04
Loss = 1.1198e-03, PNorm = 173.6156, GNorm = 0.2839, lr_0 = 1.3371e-04
Loss = 2.7188e-03, PNorm = 173.6182, GNorm = 0.1116, lr_0 = 1.3362e-04
Loss = 2.1833e-03, PNorm = 173.6185, GNorm = 0.1127, lr_0 = 1.3353e-04
Loss = 1.4450e-03, PNorm = 173.6185, GNorm = 0.2413, lr_0 = 1.3343e-04
Loss = 9.3835e-04, PNorm = 173.6202, GNorm = 0.0607, lr_0 = 1.3334e-04
Loss = 1.0109e-03, PNorm = 173.6218, GNorm = 0.0848, lr_0 = 1.3325e-04
Loss = 1.0546e-03, PNorm = 173.6228, GNorm = 0.1332, lr_0 = 1.3316e-04
Loss = 1.0979e-03, PNorm = 173.6244, GNorm = 0.1176, lr_0 = 1.3307e-04
Loss = 2.0057e-03, PNorm = 173.6268, GNorm = 0.1657, lr_0 = 1.3298e-04
Loss = 1.1098e-03, PNorm = 173.6291, GNorm = 0.0362, lr_0 = 1.3289e-04
Loss = 1.6969e-03, PNorm = 173.6312, GNorm = 0.1642, lr_0 = 1.3280e-04
Loss = 1.3477e-03, PNorm = 173.6329, GNorm = 0.1109, lr_0 = 1.3270e-04
Loss = 1.8943e-03, PNorm = 173.6338, GNorm = 0.0965, lr_0 = 1.3261e-04
Loss = 1.0150e-03, PNorm = 173.6359, GNorm = 0.1157, lr_0 = 1.3252e-04
Loss = 3.3366e-03, PNorm = 173.6379, GNorm = 0.2196, lr_0 = 1.3243e-04
Loss = 1.4143e-03, PNorm = 173.6400, GNorm = 0.0422, lr_0 = 1.3234e-04
Loss = 2.0249e-03, PNorm = 173.6414, GNorm = 0.0588, lr_0 = 1.3225e-04
Loss = 1.0438e-03, PNorm = 173.6429, GNorm = 0.0686, lr_0 = 1.3216e-04
Loss = 1.2517e-03, PNorm = 173.6454, GNorm = 0.1620, lr_0 = 1.3207e-04
Loss = 2.1459e-03, PNorm = 173.6476, GNorm = 0.1165, lr_0 = 1.3198e-04
Loss = 3.6644e-03, PNorm = 173.6485, GNorm = 0.7677, lr_0 = 1.3189e-04
Loss = 1.9605e-03, PNorm = 173.6502, GNorm = 0.1532, lr_0 = 1.3180e-04
Loss = 1.3873e-03, PNorm = 173.6534, GNorm = 0.1270, lr_0 = 1.3171e-04
Loss = 1.3181e-03, PNorm = 173.6556, GNorm = 0.1398, lr_0 = 1.3162e-04
Loss = 3.0705e-03, PNorm = 173.6566, GNorm = 0.1038, lr_0 = 1.3153e-04
Loss = 3.5338e-03, PNorm = 173.6588, GNorm = 0.1017, lr_0 = 1.3144e-04
Loss = 2.5339e-03, PNorm = 173.6612, GNorm = 0.3838, lr_0 = 1.3135e-04
Loss = 8.6538e-04, PNorm = 173.6641, GNorm = 0.0987, lr_0 = 1.3126e-04
Loss = 9.6685e-04, PNorm = 173.6661, GNorm = 0.0771, lr_0 = 1.3117e-04
Loss = 2.5013e-03, PNorm = 173.6685, GNorm = 0.1112, lr_0 = 1.3108e-04
Loss = 1.4536e-03, PNorm = 173.6707, GNorm = 0.0810, lr_0 = 1.3099e-04
Loss = 1.7928e-03, PNorm = 173.6722, GNorm = 0.0683, lr_0 = 1.3090e-04
Loss = 1.5568e-03, PNorm = 173.6744, GNorm = 0.0551, lr_0 = 1.3081e-04
Loss = 1.0327e-03, PNorm = 173.6776, GNorm = 0.1877, lr_0 = 1.3072e-04
Loss = 8.1426e-04, PNorm = 173.6810, GNorm = 0.0921, lr_0 = 1.3063e-04
Loss = 1.2108e-03, PNorm = 173.6836, GNorm = 0.0601, lr_0 = 1.3054e-04
Loss = 1.3318e-03, PNorm = 173.6859, GNorm = 0.0518, lr_0 = 1.3045e-04
Loss = 2.2086e-03, PNorm = 173.6880, GNorm = 0.4050, lr_0 = 1.3036e-04
Loss = 1.1831e-03, PNorm = 173.6898, GNorm = 0.1429, lr_0 = 1.3027e-04
Loss = 1.1005e-03, PNorm = 173.6898, GNorm = 0.0309, lr_0 = 1.3018e-04
Loss = 1.5501e-03, PNorm = 173.6909, GNorm = 0.0660, lr_0 = 1.3009e-04
Loss = 1.5972e-03, PNorm = 173.6923, GNorm = 0.0422, lr_0 = 1.3000e-04
Loss = 8.1886e-04, PNorm = 173.6937, GNorm = 0.0974, lr_0 = 1.2992e-04
Loss = 2.2145e-03, PNorm = 173.6952, GNorm = 0.1538, lr_0 = 1.2983e-04
Loss = 2.7666e-03, PNorm = 173.6980, GNorm = 0.2323, lr_0 = 1.2974e-04
Loss = 1.4943e-03, PNorm = 173.7005, GNorm = 0.1066, lr_0 = 1.2965e-04
Loss = 1.0354e-03, PNorm = 173.7038, GNorm = 0.1465, lr_0 = 1.2956e-04
Loss = 7.3250e-04, PNorm = 173.7063, GNorm = 0.1486, lr_0 = 1.2947e-04
Loss = 1.5154e-03, PNorm = 173.7088, GNorm = 0.0717, lr_0 = 1.2938e-04
Loss = 1.3865e-03, PNorm = 173.7096, GNorm = 0.0457, lr_0 = 1.2929e-04
Loss = 1.2341e-03, PNorm = 173.7117, GNorm = 0.1060, lr_0 = 1.2921e-04
Loss = 2.7031e-03, PNorm = 173.7110, GNorm = 0.1080, lr_0 = 1.2912e-04
Loss = 1.0881e-03, PNorm = 173.7127, GNorm = 0.2195, lr_0 = 1.2903e-04
Loss = 9.5462e-04, PNorm = 173.7138, GNorm = 0.1329, lr_0 = 1.2894e-04
Loss = 8.5101e-04, PNorm = 173.7161, GNorm = 0.0838, lr_0 = 1.2885e-04
Loss = 1.0467e-03, PNorm = 173.7182, GNorm = 0.0690, lr_0 = 1.2876e-04
Loss = 9.8064e-04, PNorm = 173.7211, GNorm = 0.1263, lr_0 = 1.2867e-04
Loss = 1.8284e-03, PNorm = 173.7235, GNorm = 0.1237, lr_0 = 1.2859e-04
Loss = 3.2989e-03, PNorm = 173.7255, GNorm = 0.0636, lr_0 = 1.2850e-04
Loss = 1.2969e-03, PNorm = 173.7269, GNorm = 0.1561, lr_0 = 1.2841e-04
Loss = 1.0591e-03, PNorm = 173.7289, GNorm = 0.0663, lr_0 = 1.2832e-04
Loss = 1.2348e-03, PNorm = 173.7301, GNorm = 0.1241, lr_0 = 1.2823e-04
Loss = 2.1309e-03, PNorm = 173.7311, GNorm = 0.0923, lr_0 = 1.2815e-04
Loss = 1.4883e-03, PNorm = 173.7325, GNorm = 0.0786, lr_0 = 1.2806e-04
Loss = 1.0138e-03, PNorm = 173.7328, GNorm = 0.1458, lr_0 = 1.2797e-04
Validation mae = 0.277975
Epoch 27
Loss = 1.5379e-03, PNorm = 173.7327, GNorm = 0.1490, lr_0 = 1.2788e-04
Loss = 8.8302e-04, PNorm = 173.7343, GNorm = 0.0371, lr_0 = 1.2780e-04
Loss = 7.8844e-04, PNorm = 173.7365, GNorm = 0.1927, lr_0 = 1.2771e-04
Loss = 1.0746e-03, PNorm = 173.7382, GNorm = 0.0642, lr_0 = 1.2762e-04
Loss = 1.0626e-03, PNorm = 173.7407, GNorm = 0.0938, lr_0 = 1.2753e-04
Loss = 2.6614e-03, PNorm = 173.7428, GNorm = 0.5169, lr_0 = 1.2745e-04
Loss = 9.4219e-04, PNorm = 173.7432, GNorm = 0.3038, lr_0 = 1.2736e-04
Loss = 8.7660e-04, PNorm = 173.7448, GNorm = 0.0956, lr_0 = 1.2727e-04
Loss = 1.1191e-03, PNorm = 173.7471, GNorm = 0.1253, lr_0 = 1.2718e-04
Loss = 9.8495e-04, PNorm = 173.7502, GNorm = 0.0573, lr_0 = 1.2710e-04
Loss = 1.5717e-03, PNorm = 173.7513, GNorm = 0.1506, lr_0 = 1.2701e-04
Loss = 1.0640e-03, PNorm = 173.7533, GNorm = 0.0775, lr_0 = 1.2692e-04
Loss = 8.6694e-04, PNorm = 173.7541, GNorm = 0.0465, lr_0 = 1.2684e-04
Loss = 1.4872e-03, PNorm = 173.7564, GNorm = 0.1278, lr_0 = 1.2675e-04
Loss = 7.7882e-04, PNorm = 173.7571, GNorm = 0.1110, lr_0 = 1.2666e-04
Loss = 9.0762e-04, PNorm = 173.7593, GNorm = 0.0905, lr_0 = 1.2658e-04
Loss = 3.2498e-03, PNorm = 173.7615, GNorm = 0.1836, lr_0 = 1.2649e-04
Loss = 1.4467e-03, PNorm = 173.7637, GNorm = 0.1416, lr_0 = 1.2640e-04
Loss = 1.9902e-03, PNorm = 173.7649, GNorm = 0.0914, lr_0 = 1.2632e-04
Loss = 2.4158e-03, PNorm = 173.7665, GNorm = 0.5377, lr_0 = 1.2623e-04
Loss = 1.1349e-03, PNorm = 173.7672, GNorm = 0.2280, lr_0 = 1.2614e-04
Loss = 1.0503e-03, PNorm = 173.7699, GNorm = 0.1972, lr_0 = 1.2606e-04
Loss = 2.8333e-03, PNorm = 173.7718, GNorm = 0.1455, lr_0 = 1.2597e-04
Loss = 2.2048e-03, PNorm = 173.7716, GNorm = 0.0819, lr_0 = 1.2588e-04
Loss = 7.6784e-04, PNorm = 173.7721, GNorm = 0.1053, lr_0 = 1.2580e-04
Loss = 7.0998e-04, PNorm = 173.7748, GNorm = 0.1313, lr_0 = 1.2571e-04
Loss = 8.5487e-04, PNorm = 173.7772, GNorm = 0.1332, lr_0 = 1.2563e-04
Loss = 1.6582e-03, PNorm = 173.7781, GNorm = 0.0885, lr_0 = 1.2554e-04
Loss = 1.2183e-03, PNorm = 173.7785, GNorm = 0.0981, lr_0 = 1.2545e-04
Loss = 1.1766e-03, PNorm = 173.7804, GNorm = 0.0596, lr_0 = 1.2537e-04
Loss = 1.3821e-03, PNorm = 173.7828, GNorm = 0.0519, lr_0 = 1.2528e-04
Loss = 7.9421e-04, PNorm = 173.7850, GNorm = 0.0717, lr_0 = 1.2520e-04
Loss = 2.8834e-03, PNorm = 173.7873, GNorm = 0.1088, lr_0 = 1.2511e-04
Loss = 9.8962e-04, PNorm = 173.7892, GNorm = 0.1791, lr_0 = 1.2502e-04
Loss = 1.7643e-03, PNorm = 173.7902, GNorm = 0.2190, lr_0 = 1.2494e-04
Loss = 1.7322e-03, PNorm = 173.7917, GNorm = 0.2387, lr_0 = 1.2485e-04
Loss = 1.8088e-03, PNorm = 173.7945, GNorm = 0.0559, lr_0 = 1.2477e-04
Loss = 1.4721e-03, PNorm = 173.7956, GNorm = 0.0782, lr_0 = 1.2468e-04
Loss = 1.4194e-03, PNorm = 173.7972, GNorm = 0.0963, lr_0 = 1.2460e-04
Loss = 7.3454e-04, PNorm = 173.7984, GNorm = 0.0431, lr_0 = 1.2451e-04
Loss = 8.4525e-04, PNorm = 173.7995, GNorm = 0.1079, lr_0 = 1.2443e-04
Loss = 8.1662e-04, PNorm = 173.8012, GNorm = 0.0968, lr_0 = 1.2434e-04
Loss = 1.7915e-03, PNorm = 173.8033, GNorm = 0.0995, lr_0 = 1.2426e-04
Loss = 1.2918e-03, PNorm = 173.8049, GNorm = 0.0634, lr_0 = 1.2417e-04
Loss = 1.4996e-03, PNorm = 173.8051, GNorm = 0.1523, lr_0 = 1.2409e-04
Loss = 4.1014e-03, PNorm = 173.8060, GNorm = 0.1317, lr_0 = 1.2400e-04
Loss = 1.3412e-03, PNorm = 173.8069, GNorm = 0.0564, lr_0 = 1.2392e-04
Loss = 9.4829e-04, PNorm = 173.8088, GNorm = 0.0636, lr_0 = 1.2383e-04
Loss = 1.0836e-03, PNorm = 173.8105, GNorm = 0.1017, lr_0 = 1.2375e-04
Loss = 8.1859e-04, PNorm = 173.8126, GNorm = 0.1956, lr_0 = 1.2366e-04
Loss = 7.6586e-04, PNorm = 173.8138, GNorm = 0.0574, lr_0 = 1.2358e-04
Loss = 1.4946e-03, PNorm = 173.8143, GNorm = 0.0564, lr_0 = 1.2349e-04
Loss = 7.9823e-04, PNorm = 173.8147, GNorm = 0.0576, lr_0 = 1.2341e-04
Loss = 3.1107e-03, PNorm = 173.8159, GNorm = 0.3280, lr_0 = 1.2332e-04
Loss = 2.7945e-03, PNorm = 173.8174, GNorm = 0.1585, lr_0 = 1.2324e-04
Loss = 2.6053e-03, PNorm = 173.8173, GNorm = 0.0887, lr_0 = 1.2315e-04
Loss = 1.1532e-03, PNorm = 173.8183, GNorm = 0.0836, lr_0 = 1.2307e-04
Loss = 1.6849e-03, PNorm = 173.8193, GNorm = 0.1674, lr_0 = 1.2298e-04
Loss = 1.7775e-03, PNorm = 173.8201, GNorm = 0.1981, lr_0 = 1.2290e-04
Loss = 1.8554e-03, PNorm = 173.8222, GNorm = 0.0927, lr_0 = 1.2282e-04
Loss = 1.0017e-03, PNorm = 173.8241, GNorm = 0.0453, lr_0 = 1.2273e-04
Loss = 7.4270e-04, PNorm = 173.8260, GNorm = 0.1144, lr_0 = 1.2265e-04
Loss = 1.0852e-03, PNorm = 173.8277, GNorm = 0.0499, lr_0 = 1.2256e-04
Loss = 3.7342e-03, PNorm = 173.8297, GNorm = 0.1112, lr_0 = 1.2248e-04
Loss = 1.8233e-03, PNorm = 173.8317, GNorm = 0.1015, lr_0 = 1.2240e-04
Loss = 7.1759e-04, PNorm = 173.8332, GNorm = 0.0487, lr_0 = 1.2231e-04
Loss = 2.1440e-03, PNorm = 173.8352, GNorm = 0.0997, lr_0 = 1.2223e-04
Loss = 1.5777e-03, PNorm = 173.8378, GNorm = 0.0963, lr_0 = 1.2214e-04
Loss = 8.0637e-04, PNorm = 173.8386, GNorm = 0.0391, lr_0 = 1.2206e-04
Loss = 2.2869e-03, PNorm = 173.8389, GNorm = 0.0542, lr_0 = 1.2198e-04
Loss = 7.4589e-04, PNorm = 173.8401, GNorm = 0.1057, lr_0 = 1.2189e-04
Loss = 1.8187e-03, PNorm = 173.8417, GNorm = 0.1501, lr_0 = 1.2181e-04
Loss = 1.8838e-03, PNorm = 173.8423, GNorm = 0.0985, lr_0 = 1.2173e-04
Loss = 1.3925e-03, PNorm = 173.8435, GNorm = 0.1428, lr_0 = 1.2164e-04
Loss = 8.4233e-04, PNorm = 173.8460, GNorm = 0.1323, lr_0 = 1.2156e-04
Loss = 1.3731e-03, PNorm = 173.8480, GNorm = 0.1081, lr_0 = 1.2148e-04
Loss = 6.1963e-04, PNorm = 173.8495, GNorm = 0.0183, lr_0 = 1.2139e-04
Loss = 7.2601e-04, PNorm = 173.8514, GNorm = 0.0638, lr_0 = 1.2131e-04
Loss = 7.6069e-04, PNorm = 173.8529, GNorm = 0.0598, lr_0 = 1.2123e-04
Loss = 1.1699e-03, PNorm = 173.8529, GNorm = 0.1543, lr_0 = 1.2114e-04
Loss = 1.7055e-03, PNorm = 173.8537, GNorm = 0.2589, lr_0 = 1.2106e-04
Loss = 1.7898e-03, PNorm = 173.8542, GNorm = 0.0601, lr_0 = 1.2098e-04
Loss = 3.4293e-03, PNorm = 173.8579, GNorm = 0.2490, lr_0 = 1.2090e-04
Loss = 1.4186e-03, PNorm = 173.8608, GNorm = 0.0721, lr_0 = 1.2081e-04
Loss = 2.4888e-03, PNorm = 173.8633, GNorm = 0.0311, lr_0 = 1.2073e-04
Loss = 1.8525e-03, PNorm = 173.8645, GNorm = 0.2254, lr_0 = 1.2065e-04
Loss = 8.2727e-04, PNorm = 173.8653, GNorm = 0.0910, lr_0 = 1.2056e-04
Loss = 2.1497e-03, PNorm = 173.8660, GNorm = 0.0419, lr_0 = 1.2048e-04
Loss = 1.9020e-03, PNorm = 173.8656, GNorm = 0.1312, lr_0 = 1.2040e-04
Loss = 1.9978e-03, PNorm = 173.8679, GNorm = 0.1931, lr_0 = 1.2032e-04
Loss = 7.3322e-04, PNorm = 173.8705, GNorm = 0.1193, lr_0 = 1.2023e-04
Loss = 6.5112e-04, PNorm = 173.8731, GNorm = 0.0442, lr_0 = 1.2015e-04
Loss = 1.2312e-03, PNorm = 173.8747, GNorm = 0.0912, lr_0 = 1.2007e-04
Loss = 9.0189e-04, PNorm = 173.8760, GNorm = 0.0623, lr_0 = 1.1999e-04
Loss = 7.7228e-04, PNorm = 173.8765, GNorm = 0.0596, lr_0 = 1.1991e-04
Loss = 8.2274e-04, PNorm = 173.8771, GNorm = 0.1120, lr_0 = 1.1982e-04
Loss = 1.8486e-03, PNorm = 173.8782, GNorm = 0.1150, lr_0 = 1.1974e-04
Loss = 8.0153e-04, PNorm = 173.8809, GNorm = 0.0358, lr_0 = 1.1966e-04
Loss = 8.3368e-04, PNorm = 173.8832, GNorm = 0.1297, lr_0 = 1.1958e-04
Loss = 7.8390e-04, PNorm = 173.8864, GNorm = 0.1157, lr_0 = 1.1950e-04
Loss = 1.3502e-03, PNorm = 173.8888, GNorm = 0.1285, lr_0 = 1.1941e-04
Loss = 1.5643e-03, PNorm = 173.8915, GNorm = 0.1593, lr_0 = 1.1933e-04
Loss = 8.5106e-04, PNorm = 173.8937, GNorm = 0.0594, lr_0 = 1.1925e-04
Loss = 1.0333e-03, PNorm = 173.8963, GNorm = 0.0876, lr_0 = 1.1917e-04
Loss = 6.3013e-04, PNorm = 173.8983, GNorm = 0.0268, lr_0 = 1.1909e-04
Loss = 1.4640e-03, PNorm = 173.8991, GNorm = 0.1554, lr_0 = 1.1901e-04
Loss = 7.1576e-04, PNorm = 173.8993, GNorm = 0.0971, lr_0 = 1.1892e-04
Loss = 3.8684e-03, PNorm = 173.9000, GNorm = 0.1392, lr_0 = 1.1884e-04
Loss = 9.3307e-04, PNorm = 173.9020, GNorm = 0.0709, lr_0 = 1.1876e-04
Loss = 1.0176e-03, PNorm = 173.9028, GNorm = 0.0998, lr_0 = 1.1868e-04
Loss = 1.7140e-03, PNorm = 173.9041, GNorm = 0.1055, lr_0 = 1.1860e-04
Loss = 9.1078e-04, PNorm = 173.9066, GNorm = 0.0383, lr_0 = 1.1852e-04
Loss = 9.8767e-04, PNorm = 173.9094, GNorm = 0.1107, lr_0 = 1.1844e-04
Loss = 1.8331e-03, PNorm = 173.9119, GNorm = 0.1115, lr_0 = 1.1835e-04
Loss = 1.3546e-03, PNorm = 173.9147, GNorm = 0.1564, lr_0 = 1.1827e-04
Loss = 7.8848e-04, PNorm = 173.9165, GNorm = 0.1577, lr_0 = 1.1819e-04
Loss = 1.3830e-03, PNorm = 173.9174, GNorm = 0.1936, lr_0 = 1.1811e-04
Loss = 6.3560e-04, PNorm = 173.9185, GNorm = 0.0373, lr_0 = 1.1803e-04
Loss = 1.1315e-03, PNorm = 173.9192, GNorm = 0.0756, lr_0 = 1.1795e-04
Loss = 1.0422e-03, PNorm = 173.9220, GNorm = 0.0806, lr_0 = 1.1787e-04
Validation mae = 0.277993
Epoch 28
Loss = 1.0519e-03, PNorm = 173.9230, GNorm = 0.1635, lr_0 = 1.1779e-04
Loss = 3.4669e-03, PNorm = 173.9233, GNorm = 0.0215, lr_0 = 1.1771e-04
Loss = 1.1081e-03, PNorm = 173.9240, GNorm = 0.1418, lr_0 = 1.1763e-04
Loss = 2.0831e-03, PNorm = 173.9249, GNorm = 0.1355, lr_0 = 1.1755e-04
Loss = 1.1934e-03, PNorm = 173.9267, GNorm = 0.1227, lr_0 = 1.1747e-04
Loss = 1.0837e-03, PNorm = 173.9290, GNorm = 0.0678, lr_0 = 1.1739e-04
Loss = 6.9236e-04, PNorm = 173.9306, GNorm = 0.1151, lr_0 = 1.1730e-04
Loss = 9.0237e-04, PNorm = 173.9313, GNorm = 0.0961, lr_0 = 1.1722e-04
Loss = 9.1636e-04, PNorm = 173.9317, GNorm = 0.1242, lr_0 = 1.1714e-04
Loss = 6.5966e-04, PNorm = 173.9320, GNorm = 0.0339, lr_0 = 1.1706e-04
Loss = 1.2806e-03, PNorm = 173.9319, GNorm = 0.1489, lr_0 = 1.1698e-04
Loss = 1.1807e-03, PNorm = 173.9331, GNorm = 0.0632, lr_0 = 1.1690e-04
Loss = 9.5369e-04, PNorm = 173.9339, GNorm = 0.1014, lr_0 = 1.1682e-04
Loss = 7.5903e-04, PNorm = 173.9356, GNorm = 0.0921, lr_0 = 1.1674e-04
Loss = 1.2244e-03, PNorm = 173.9375, GNorm = 0.5449, lr_0 = 1.1666e-04
Loss = 7.4827e-04, PNorm = 173.9410, GNorm = 0.0655, lr_0 = 1.1658e-04
Loss = 1.2491e-03, PNorm = 173.9423, GNorm = 0.2006, lr_0 = 1.1650e-04
Loss = 2.1870e-03, PNorm = 173.9421, GNorm = 0.5254, lr_0 = 1.1642e-04
Loss = 1.4056e-03, PNorm = 173.9441, GNorm = 0.1218, lr_0 = 1.1634e-04
Loss = 5.7983e-04, PNorm = 173.9451, GNorm = 0.1199, lr_0 = 1.1626e-04
Loss = 5.0599e-04, PNorm = 173.9462, GNorm = 0.1105, lr_0 = 1.1618e-04
Loss = 1.6766e-03, PNorm = 173.9473, GNorm = 0.0950, lr_0 = 1.1611e-04
Loss = 1.2028e-03, PNorm = 173.9492, GNorm = 0.1297, lr_0 = 1.1603e-04
Loss = 6.7962e-04, PNorm = 173.9494, GNorm = 0.0999, lr_0 = 1.1595e-04
Loss = 1.1467e-03, PNorm = 173.9501, GNorm = 0.0559, lr_0 = 1.1587e-04
Loss = 6.8021e-04, PNorm = 173.9511, GNorm = 0.1290, lr_0 = 1.1579e-04
Loss = 6.2782e-04, PNorm = 173.9535, GNorm = 0.0665, lr_0 = 1.1571e-04
Loss = 1.2148e-03, PNorm = 173.9560, GNorm = 0.0757, lr_0 = 1.1563e-04
Loss = 6.6386e-04, PNorm = 173.9569, GNorm = 0.0897, lr_0 = 1.1555e-04
Loss = 9.6887e-04, PNorm = 173.9582, GNorm = 0.1183, lr_0 = 1.1547e-04
Loss = 5.7170e-04, PNorm = 173.9603, GNorm = 0.0479, lr_0 = 1.1539e-04
Loss = 1.3077e-03, PNorm = 173.9617, GNorm = 0.0847, lr_0 = 1.1531e-04
Loss = 4.8648e-03, PNorm = 173.9621, GNorm = 0.0896, lr_0 = 1.1523e-04
Loss = 1.1663e-03, PNorm = 173.9644, GNorm = 0.1783, lr_0 = 1.1515e-04
Loss = 8.2334e-04, PNorm = 173.9650, GNorm = 0.0481, lr_0 = 1.1508e-04
Loss = 6.1411e-04, PNorm = 173.9659, GNorm = 0.0393, lr_0 = 1.1500e-04
Loss = 5.9063e-04, PNorm = 173.9670, GNorm = 0.0389, lr_0 = 1.1492e-04
Loss = 1.3177e-03, PNorm = 173.9683, GNorm = 0.0758, lr_0 = 1.1484e-04
Loss = 8.1890e-04, PNorm = 173.9709, GNorm = 0.1446, lr_0 = 1.1476e-04
Loss = 7.3936e-04, PNorm = 173.9716, GNorm = 0.0915, lr_0 = 1.1468e-04
Loss = 7.6886e-04, PNorm = 173.9724, GNorm = 0.0305, lr_0 = 1.1460e-04
Loss = 9.0691e-04, PNorm = 173.9733, GNorm = 0.0334, lr_0 = 1.1452e-04
Loss = 1.5132e-03, PNorm = 173.9747, GNorm = 0.0825, lr_0 = 1.1445e-04
Loss = 7.1494e-04, PNorm = 173.9757, GNorm = 0.0589, lr_0 = 1.1437e-04
Loss = 7.5425e-04, PNorm = 173.9766, GNorm = 0.0915, lr_0 = 1.1429e-04
Loss = 2.7958e-03, PNorm = 173.9776, GNorm = 0.6019, lr_0 = 1.1421e-04
Loss = 1.0692e-03, PNorm = 173.9779, GNorm = 0.0510, lr_0 = 1.1413e-04
Loss = 7.3951e-04, PNorm = 173.9793, GNorm = 0.0668, lr_0 = 1.1405e-04
Loss = 6.6058e-04, PNorm = 173.9810, GNorm = 0.0575, lr_0 = 1.1398e-04
Loss = 1.3817e-03, PNorm = 173.9817, GNorm = 0.1830, lr_0 = 1.1390e-04
Loss = 2.8967e-03, PNorm = 173.9812, GNorm = 0.0423, lr_0 = 1.1382e-04
Loss = 6.4825e-04, PNorm = 173.9826, GNorm = 0.0410, lr_0 = 1.1374e-04
Loss = 2.6228e-03, PNorm = 173.9837, GNorm = 0.1042, lr_0 = 1.1366e-04
Loss = 5.9181e-04, PNorm = 173.9849, GNorm = 0.1038, lr_0 = 1.1359e-04
Loss = 1.2439e-03, PNorm = 173.9870, GNorm = 0.0658, lr_0 = 1.1351e-04
Loss = 1.2235e-03, PNorm = 173.9897, GNorm = 0.0549, lr_0 = 1.1343e-04
Loss = 5.7736e-04, PNorm = 173.9911, GNorm = 0.0905, lr_0 = 1.1335e-04
Loss = 1.5231e-03, PNorm = 173.9926, GNorm = 0.1378, lr_0 = 1.1328e-04
Loss = 2.1704e-03, PNorm = 173.9935, GNorm = 0.0746, lr_0 = 1.1320e-04
Loss = 8.6864e-04, PNorm = 173.9936, GNorm = 0.0828, lr_0 = 1.1312e-04
Loss = 1.4755e-03, PNorm = 173.9948, GNorm = 0.0607, lr_0 = 1.1304e-04
Loss = 7.5412e-04, PNorm = 173.9946, GNorm = 0.1172, lr_0 = 1.1297e-04
Loss = 1.4351e-03, PNorm = 173.9958, GNorm = 0.0650, lr_0 = 1.1289e-04
Loss = 3.1408e-03, PNorm = 173.9969, GNorm = 0.0400, lr_0 = 1.1281e-04
Loss = 3.0129e-03, PNorm = 173.9994, GNorm = 0.1585, lr_0 = 1.1273e-04
Loss = 2.6220e-03, PNorm = 174.0005, GNorm = 0.0592, lr_0 = 1.1266e-04
Loss = 8.1667e-04, PNorm = 174.0014, GNorm = 0.1194, lr_0 = 1.1258e-04
Loss = 1.1761e-03, PNorm = 174.0031, GNorm = 0.0609, lr_0 = 1.1250e-04
Loss = 1.7879e-03, PNorm = 174.0054, GNorm = 0.1008, lr_0 = 1.1243e-04
Loss = 1.6812e-03, PNorm = 174.0075, GNorm = 0.1619, lr_0 = 1.1235e-04
Loss = 1.3464e-03, PNorm = 174.0090, GNorm = 0.0564, lr_0 = 1.1227e-04
Loss = 7.6762e-04, PNorm = 174.0098, GNorm = 0.0878, lr_0 = 1.1219e-04
Loss = 7.3816e-04, PNorm = 174.0108, GNorm = 0.1147, lr_0 = 1.1212e-04
Loss = 6.0408e-04, PNorm = 174.0128, GNorm = 0.0370, lr_0 = 1.1204e-04
Loss = 8.4654e-04, PNorm = 174.0148, GNorm = 0.0783, lr_0 = 1.1196e-04
Loss = 1.0819e-03, PNorm = 174.0166, GNorm = 0.1918, lr_0 = 1.1189e-04
Loss = 7.9509e-04, PNorm = 174.0174, GNorm = 0.1930, lr_0 = 1.1181e-04
Loss = 6.7131e-04, PNorm = 174.0189, GNorm = 0.1459, lr_0 = 1.1173e-04
Loss = 7.4889e-04, PNorm = 174.0196, GNorm = 0.0759, lr_0 = 1.1166e-04
Loss = 2.0650e-03, PNorm = 174.0212, GNorm = 0.1097, lr_0 = 1.1158e-04
Loss = 1.1370e-03, PNorm = 174.0232, GNorm = 0.0680, lr_0 = 1.1150e-04
Loss = 1.2850e-03, PNorm = 174.0251, GNorm = 0.0702, lr_0 = 1.1143e-04
Loss = 1.5652e-03, PNorm = 174.0269, GNorm = 0.2500, lr_0 = 1.1135e-04
Loss = 1.3150e-03, PNorm = 174.0292, GNorm = 0.0690, lr_0 = 1.1128e-04
Loss = 9.7645e-04, PNorm = 174.0313, GNorm = 0.0917, lr_0 = 1.1120e-04
Loss = 6.1345e-04, PNorm = 174.0337, GNorm = 0.0818, lr_0 = 1.1112e-04
Loss = 8.1652e-04, PNorm = 174.0336, GNorm = 0.0579, lr_0 = 1.1105e-04
Loss = 2.4652e-03, PNorm = 174.0325, GNorm = 0.1755, lr_0 = 1.1097e-04
Loss = 2.2744e-03, PNorm = 174.0338, GNorm = 0.0793, lr_0 = 1.1089e-04
Loss = 9.6347e-04, PNorm = 174.0368, GNorm = 0.1091, lr_0 = 1.1082e-04
Loss = 2.2745e-03, PNorm = 174.0391, GNorm = 0.0872, lr_0 = 1.1074e-04
Loss = 5.9754e-04, PNorm = 174.0394, GNorm = 0.0625, lr_0 = 1.1067e-04
Loss = 1.5565e-03, PNorm = 174.0400, GNorm = 0.0369, lr_0 = 1.1059e-04
Loss = 1.0130e-03, PNorm = 174.0404, GNorm = 0.1274, lr_0 = 1.1052e-04
Loss = 7.7341e-04, PNorm = 174.0428, GNorm = 0.0321, lr_0 = 1.1044e-04
Loss = 2.6501e-03, PNorm = 174.0454, GNorm = 0.0982, lr_0 = 1.1036e-04
Loss = 3.0099e-03, PNorm = 174.0464, GNorm = 0.2805, lr_0 = 1.1029e-04
Loss = 7.7615e-04, PNorm = 174.0465, GNorm = 0.0520, lr_0 = 1.1021e-04
Loss = 1.3777e-03, PNorm = 174.0487, GNorm = 0.1653, lr_0 = 1.1014e-04
Loss = 1.9712e-03, PNorm = 174.0506, GNorm = 0.1587, lr_0 = 1.1006e-04
Loss = 3.4382e-03, PNorm = 174.0539, GNorm = 0.0259, lr_0 = 1.0999e-04
Loss = 1.3310e-03, PNorm = 174.0563, GNorm = 0.2540, lr_0 = 1.0991e-04
Loss = 1.6771e-03, PNorm = 174.0575, GNorm = 0.0352, lr_0 = 1.0984e-04
Loss = 1.7332e-03, PNorm = 174.0598, GNorm = 0.0779, lr_0 = 1.0976e-04
Loss = 2.0341e-03, PNorm = 174.0608, GNorm = 0.1142, lr_0 = 1.0969e-04
Loss = 1.4136e-03, PNorm = 174.0629, GNorm = 0.1499, lr_0 = 1.0961e-04
Loss = 8.1240e-04, PNorm = 174.0654, GNorm = 0.0964, lr_0 = 1.0954e-04
Loss = 1.2281e-03, PNorm = 174.0675, GNorm = 0.0553, lr_0 = 1.0946e-04
Loss = 6.4355e-04, PNorm = 174.0689, GNorm = 0.0756, lr_0 = 1.0939e-04
Loss = 2.0006e-03, PNorm = 174.0702, GNorm = 0.5546, lr_0 = 1.0931e-04
Loss = 7.0862e-04, PNorm = 174.0709, GNorm = 0.0932, lr_0 = 1.0924e-04
Loss = 5.7852e-04, PNorm = 174.0721, GNorm = 0.0489, lr_0 = 1.0916e-04
Loss = 8.8242e-04, PNorm = 174.0748, GNorm = 0.1160, lr_0 = 1.0909e-04
Loss = 1.1461e-03, PNorm = 174.0771, GNorm = 0.0698, lr_0 = 1.0901e-04
Loss = 1.2786e-03, PNorm = 174.0792, GNorm = 0.2415, lr_0 = 1.0894e-04
Loss = 9.5792e-04, PNorm = 174.0799, GNorm = 0.0457, lr_0 = 1.0886e-04
Loss = 7.3245e-04, PNorm = 174.0821, GNorm = 0.1829, lr_0 = 1.0879e-04
Loss = 1.0139e-03, PNorm = 174.0827, GNorm = 0.0640, lr_0 = 1.0871e-04
Loss = 1.9835e-03, PNorm = 174.0849, GNorm = 0.1425, lr_0 = 1.0864e-04
Loss = 6.6312e-04, PNorm = 174.0859, GNorm = 0.0295, lr_0 = 1.0856e-04
Validation mae = 0.278378
Epoch 29
Loss = 1.1809e-03, PNorm = 174.0859, GNorm = 0.0725, lr_0 = 1.0849e-04
Loss = 1.2295e-03, PNorm = 174.0866, GNorm = 0.1461, lr_0 = 1.0841e-04
Loss = 1.4676e-03, PNorm = 174.0870, GNorm = 0.2684, lr_0 = 1.0834e-04
Loss = 1.2055e-03, PNorm = 174.0880, GNorm = 0.1481, lr_0 = 1.0827e-04
Loss = 1.6836e-03, PNorm = 174.0872, GNorm = 0.1123, lr_0 = 1.0819e-04
Loss = 5.4631e-04, PNorm = 174.0873, GNorm = 0.0854, lr_0 = 1.0812e-04
Loss = 1.3807e-03, PNorm = 174.0881, GNorm = 0.1242, lr_0 = 1.0804e-04
Loss = 5.8441e-04, PNorm = 174.0900, GNorm = 0.0778, lr_0 = 1.0797e-04
Loss = 1.6014e-03, PNorm = 174.0916, GNorm = 0.0494, lr_0 = 1.0790e-04
Loss = 5.8306e-04, PNorm = 174.0928, GNorm = 0.0596, lr_0 = 1.0782e-04
Loss = 1.3717e-03, PNorm = 174.0941, GNorm = 0.0367, lr_0 = 1.0775e-04
Loss = 1.3882e-03, PNorm = 174.0952, GNorm = 0.4723, lr_0 = 1.0767e-04
Loss = 1.0497e-03, PNorm = 174.0966, GNorm = 0.0315, lr_0 = 1.0760e-04
Loss = 1.4686e-03, PNorm = 174.0989, GNorm = 0.1938, lr_0 = 1.0753e-04
Loss = 1.0478e-03, PNorm = 174.0998, GNorm = 0.1044, lr_0 = 1.0745e-04
Loss = 4.8579e-04, PNorm = 174.1006, GNorm = 0.1113, lr_0 = 1.0738e-04
Loss = 1.0661e-03, PNorm = 174.1019, GNorm = 0.0566, lr_0 = 1.0731e-04
Loss = 1.5909e-03, PNorm = 174.1023, GNorm = 0.1301, lr_0 = 1.0723e-04
Loss = 1.6189e-03, PNorm = 174.1016, GNorm = 0.0883, lr_0 = 1.0716e-04
Loss = 1.1628e-03, PNorm = 174.1018, GNorm = 0.0311, lr_0 = 1.0709e-04
Loss = 2.8766e-03, PNorm = 174.1035, GNorm = 0.0888, lr_0 = 1.0701e-04
Loss = 6.1162e-04, PNorm = 174.1054, GNorm = 0.0319, lr_0 = 1.0694e-04
Loss = 5.0908e-04, PNorm = 174.1070, GNorm = 0.0750, lr_0 = 1.0687e-04
Loss = 4.8802e-04, PNorm = 174.1082, GNorm = 0.0450, lr_0 = 1.0679e-04
Loss = 1.2426e-03, PNorm = 174.1092, GNorm = 0.0540, lr_0 = 1.0672e-04
Loss = 1.4951e-03, PNorm = 174.1106, GNorm = 0.2492, lr_0 = 1.0665e-04
Loss = 1.7540e-03, PNorm = 174.1109, GNorm = 0.2744, lr_0 = 1.0657e-04
Loss = 5.6639e-04, PNorm = 174.1123, GNorm = 0.1574, lr_0 = 1.0650e-04
Loss = 5.4133e-04, PNorm = 174.1133, GNorm = 0.0399, lr_0 = 1.0643e-04
Loss = 2.5903e-03, PNorm = 174.1148, GNorm = 0.2102, lr_0 = 1.0635e-04
Loss = 5.8975e-04, PNorm = 174.1160, GNorm = 0.1670, lr_0 = 1.0628e-04
Loss = 8.7331e-04, PNorm = 174.1174, GNorm = 0.1228, lr_0 = 1.0621e-04
Loss = 1.6777e-03, PNorm = 174.1198, GNorm = 0.2003, lr_0 = 1.0614e-04
Loss = 8.7965e-04, PNorm = 174.1202, GNorm = 0.1013, lr_0 = 1.0606e-04
Loss = 1.3363e-03, PNorm = 174.1208, GNorm = 0.0838, lr_0 = 1.0599e-04
Loss = 8.2541e-04, PNorm = 174.1225, GNorm = 0.0562, lr_0 = 1.0592e-04
Loss = 1.4132e-03, PNorm = 174.1243, GNorm = 0.1891, lr_0 = 1.0585e-04
Loss = 2.9704e-03, PNorm = 174.1265, GNorm = 0.0372, lr_0 = 1.0577e-04
Loss = 6.0668e-04, PNorm = 174.1291, GNorm = 0.0315, lr_0 = 1.0570e-04
Loss = 1.1726e-03, PNorm = 174.1304, GNorm = 0.0932, lr_0 = 1.0563e-04
Loss = 8.1696e-04, PNorm = 174.1311, GNorm = 0.0584, lr_0 = 1.0556e-04
Loss = 6.2738e-04, PNorm = 174.1314, GNorm = 0.0771, lr_0 = 1.0548e-04
Loss = 4.2456e-04, PNorm = 174.1325, GNorm = 0.0317, lr_0 = 1.0541e-04
Loss = 4.8339e-04, PNorm = 174.1342, GNorm = 0.1132, lr_0 = 1.0534e-04
Loss = 9.8783e-04, PNorm = 174.1356, GNorm = 0.0554, lr_0 = 1.0527e-04
Loss = 1.7687e-03, PNorm = 174.1362, GNorm = 0.1939, lr_0 = 1.0519e-04
Loss = 5.2633e-04, PNorm = 174.1381, GNorm = 0.0474, lr_0 = 1.0512e-04
Loss = 1.6615e-03, PNorm = 174.1394, GNorm = 0.3494, lr_0 = 1.0505e-04
Loss = 2.7247e-03, PNorm = 174.1408, GNorm = 0.1078, lr_0 = 1.0498e-04
Loss = 4.1488e-03, PNorm = 174.1400, GNorm = 0.1636, lr_0 = 1.0491e-04
Loss = 7.7826e-04, PNorm = 174.1404, GNorm = 0.1281, lr_0 = 1.0483e-04
Loss = 4.9458e-04, PNorm = 174.1411, GNorm = 0.1422, lr_0 = 1.0476e-04
Loss = 2.1691e-03, PNorm = 174.1423, GNorm = 0.7053, lr_0 = 1.0469e-04
Loss = 8.6027e-04, PNorm = 174.1442, GNorm = 0.0932, lr_0 = 1.0462e-04
Loss = 8.2334e-04, PNorm = 174.1456, GNorm = 0.0275, lr_0 = 1.0455e-04
Loss = 1.4960e-03, PNorm = 174.1470, GNorm = 0.1580, lr_0 = 1.0448e-04
Loss = 1.2160e-03, PNorm = 174.1473, GNorm = 0.1048, lr_0 = 1.0440e-04
Loss = 2.3588e-03, PNorm = 174.1481, GNorm = 0.1016, lr_0 = 1.0433e-04
Loss = 1.7746e-03, PNorm = 174.1486, GNorm = 0.2908, lr_0 = 1.0426e-04
Loss = 2.9334e-03, PNorm = 174.1499, GNorm = 0.1032, lr_0 = 1.0419e-04
Loss = 1.2124e-03, PNorm = 174.1514, GNorm = 0.1677, lr_0 = 1.0412e-04
Loss = 2.6707e-03, PNorm = 174.1538, GNorm = 0.1111, lr_0 = 1.0405e-04
Loss = 1.4587e-03, PNorm = 174.1551, GNorm = 0.0641, lr_0 = 1.0398e-04
Loss = 7.6041e-04, PNorm = 174.1569, GNorm = 0.0314, lr_0 = 1.0391e-04
Loss = 1.3663e-03, PNorm = 174.1574, GNorm = 0.1477, lr_0 = 1.0383e-04
Loss = 7.8779e-04, PNorm = 174.1579, GNorm = 0.0845, lr_0 = 1.0376e-04
Loss = 9.7915e-04, PNorm = 174.1587, GNorm = 0.0358, lr_0 = 1.0369e-04
Loss = 1.3525e-03, PNorm = 174.1587, GNorm = 0.0366, lr_0 = 1.0362e-04
Loss = 5.8107e-04, PNorm = 174.1593, GNorm = 0.1498, lr_0 = 1.0355e-04
Loss = 6.0988e-04, PNorm = 174.1596, GNorm = 0.0674, lr_0 = 1.0348e-04
Loss = 8.9111e-04, PNorm = 174.1607, GNorm = 0.0309, lr_0 = 1.0341e-04
Loss = 1.0331e-03, PNorm = 174.1621, GNorm = 0.0575, lr_0 = 1.0334e-04
Loss = 4.7647e-04, PNorm = 174.1626, GNorm = 0.0411, lr_0 = 1.0327e-04
Loss = 1.4372e-03, PNorm = 174.1638, GNorm = 0.0648, lr_0 = 1.0320e-04
Loss = 8.6423e-04, PNorm = 174.1653, GNorm = 0.0273, lr_0 = 1.0312e-04
Loss = 7.5036e-04, PNorm = 174.1656, GNorm = 0.0660, lr_0 = 1.0305e-04
Loss = 5.3281e-04, PNorm = 174.1678, GNorm = 0.1283, lr_0 = 1.0298e-04
Loss = 1.1147e-03, PNorm = 174.1692, GNorm = 0.2036, lr_0 = 1.0291e-04
Loss = 7.8078e-04, PNorm = 174.1706, GNorm = 0.0782, lr_0 = 1.0284e-04
Loss = 1.8505e-03, PNorm = 174.1706, GNorm = 0.1970, lr_0 = 1.0277e-04
Loss = 1.5569e-03, PNorm = 174.1711, GNorm = 0.1710, lr_0 = 1.0270e-04
Loss = 5.4138e-04, PNorm = 174.1724, GNorm = 0.0680, lr_0 = 1.0263e-04
Loss = 1.1602e-03, PNorm = 174.1742, GNorm = 0.0688, lr_0 = 1.0256e-04
Loss = 1.5711e-03, PNorm = 174.1748, GNorm = 0.1335, lr_0 = 1.0249e-04
Loss = 2.5983e-03, PNorm = 174.1757, GNorm = 0.0477, lr_0 = 1.0242e-04
Loss = 1.6392e-03, PNorm = 174.1768, GNorm = 0.0472, lr_0 = 1.0235e-04
Loss = 1.1274e-03, PNorm = 174.1774, GNorm = 0.1658, lr_0 = 1.0228e-04
Loss = 6.9748e-04, PNorm = 174.1793, GNorm = 0.1033, lr_0 = 1.0221e-04
Loss = 1.1955e-03, PNorm = 174.1807, GNorm = 0.0739, lr_0 = 1.0214e-04
Loss = 9.3479e-04, PNorm = 174.1818, GNorm = 0.1370, lr_0 = 1.0207e-04
Loss = 9.1115e-04, PNorm = 174.1823, GNorm = 0.0957, lr_0 = 1.0200e-04
Loss = 1.2615e-03, PNorm = 174.1837, GNorm = 0.0283, lr_0 = 1.0193e-04
Loss = 5.3392e-04, PNorm = 174.1840, GNorm = 0.0546, lr_0 = 1.0186e-04
Loss = 1.6071e-03, PNorm = 174.1835, GNorm = 0.0919, lr_0 = 1.0179e-04
Loss = 6.0817e-04, PNorm = 174.1847, GNorm = 0.1006, lr_0 = 1.0172e-04
Loss = 1.1996e-03, PNorm = 174.1872, GNorm = 0.0972, lr_0 = 1.0165e-04
Loss = 6.1688e-04, PNorm = 174.1892, GNorm = 0.0681, lr_0 = 1.0158e-04
Loss = 9.8069e-04, PNorm = 174.1889, GNorm = 0.0553, lr_0 = 1.0151e-04
Loss = 8.3452e-04, PNorm = 174.1901, GNorm = 0.0382, lr_0 = 1.0144e-04
Loss = 1.5978e-03, PNorm = 174.1906, GNorm = 0.0393, lr_0 = 1.0137e-04
Loss = 5.6594e-04, PNorm = 174.1916, GNorm = 0.0557, lr_0 = 1.0130e-04
Loss = 1.5075e-03, PNorm = 174.1917, GNorm = 0.2314, lr_0 = 1.0123e-04
Loss = 9.0650e-04, PNorm = 174.1931, GNorm = 0.1581, lr_0 = 1.0116e-04
Loss = 1.0763e-03, PNorm = 174.1936, GNorm = 0.0359, lr_0 = 1.0110e-04
Loss = 8.9969e-04, PNorm = 174.1948, GNorm = 0.1117, lr_0 = 1.0103e-04
Loss = 1.0563e-03, PNorm = 174.1970, GNorm = 0.0801, lr_0 = 1.0096e-04
Loss = 6.8710e-04, PNorm = 174.1980, GNorm = 0.1279, lr_0 = 1.0089e-04
Loss = 6.4191e-04, PNorm = 174.1987, GNorm = 0.0449, lr_0 = 1.0082e-04
Loss = 8.1049e-04, PNorm = 174.1997, GNorm = 0.0449, lr_0 = 1.0075e-04
Loss = 7.3634e-04, PNorm = 174.2006, GNorm = 0.0568, lr_0 = 1.0068e-04
Loss = 1.0123e-03, PNorm = 174.2018, GNorm = 0.0446, lr_0 = 1.0061e-04
Loss = 9.2780e-04, PNorm = 174.2040, GNorm = 0.0703, lr_0 = 1.0054e-04
Loss = 9.0924e-04, PNorm = 174.2063, GNorm = 0.0309, lr_0 = 1.0047e-04
Loss = 1.0688e-03, PNorm = 174.2088, GNorm = 0.0616, lr_0 = 1.0041e-04
Loss = 2.2860e-03, PNorm = 174.2097, GNorm = 0.0515, lr_0 = 1.0034e-04
Loss = 7.3932e-04, PNorm = 174.2104, GNorm = 0.0598, lr_0 = 1.0027e-04
Loss = 2.2831e-03, PNorm = 174.2116, GNorm = 0.0498, lr_0 = 1.0020e-04
Loss = 2.2231e-03, PNorm = 174.2139, GNorm = 0.1253, lr_0 = 1.0013e-04
Loss = 7.4280e-04, PNorm = 174.2165, GNorm = 0.0398, lr_0 = 1.0006e-04
Loss = 1.9553e-03, PNorm = 174.2183, GNorm = 0.1235, lr_0 = 1.0000e-04
Validation mae = 0.278283
Model 0 best validation mae = 0.277975 on epoch 26
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.273797
Ensemble test mae = 0.273797
Fold 6
Splitting data with seed 6
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 8.5488e-01, PNorm = 65.7652, GNorm = 2.3471, lr_0 = 1.0413e-04
Loss = 6.8070e-01, PNorm = 65.7774, GNorm = 1.3033, lr_0 = 1.0788e-04
Loss = 5.6587e-01, PNorm = 65.7902, GNorm = 2.7887, lr_0 = 1.1163e-04
Loss = 4.5701e-01, PNorm = 65.8035, GNorm = 1.8932, lr_0 = 1.1537e-04
Loss = 4.6983e-01, PNorm = 65.8134, GNorm = 2.0937, lr_0 = 1.1913e-04
Loss = 4.3490e-01, PNorm = 65.8225, GNorm = 2.0569, lr_0 = 1.2287e-04
Loss = 4.6910e-01, PNorm = 65.8315, GNorm = 2.6544, lr_0 = 1.2663e-04
Loss = 4.5656e-01, PNorm = 65.8402, GNorm = 2.2394, lr_0 = 1.3038e-04
Loss = 4.6018e-01, PNorm = 65.8496, GNorm = 1.8802, lr_0 = 1.3413e-04
Loss = 4.1030e-01, PNorm = 65.8602, GNorm = 2.6414, lr_0 = 1.3788e-04
Loss = 3.7665e-01, PNorm = 65.8706, GNorm = 1.9975, lr_0 = 1.4163e-04
Loss = 3.7451e-01, PNorm = 65.8811, GNorm = 2.1359, lr_0 = 1.4537e-04
Loss = 3.5782e-01, PNorm = 65.8899, GNorm = 3.1531, lr_0 = 1.4913e-04
Loss = 4.0890e-01, PNorm = 65.9003, GNorm = 2.9541, lr_0 = 1.5288e-04
Loss = 3.7265e-01, PNorm = 65.9123, GNorm = 3.0288, lr_0 = 1.5662e-04
Loss = 3.4760e-01, PNorm = 65.9228, GNorm = 1.8753, lr_0 = 1.6038e-04
Loss = 3.2553e-01, PNorm = 65.9341, GNorm = 1.7064, lr_0 = 1.6412e-04
Loss = 3.4787e-01, PNorm = 65.9448, GNorm = 2.6334, lr_0 = 1.6788e-04
Loss = 3.5661e-01, PNorm = 65.9574, GNorm = 1.7917, lr_0 = 1.7163e-04
Loss = 3.5596e-01, PNorm = 65.9702, GNorm = 2.5061, lr_0 = 1.7538e-04
Loss = 3.4164e-01, PNorm = 65.9834, GNorm = 1.7718, lr_0 = 1.7913e-04
Loss = 3.3296e-01, PNorm = 65.9960, GNorm = 2.0422, lr_0 = 1.8288e-04
Loss = 3.6314e-01, PNorm = 66.0114, GNorm = 1.9815, lr_0 = 1.8662e-04
Loss = 3.3233e-01, PNorm = 66.0251, GNorm = 1.7357, lr_0 = 1.9038e-04
Loss = 3.8672e-01, PNorm = 66.0381, GNorm = 1.3625, lr_0 = 1.9413e-04
Loss = 3.0333e-01, PNorm = 66.0550, GNorm = 1.4380, lr_0 = 1.9788e-04
Loss = 3.1290e-01, PNorm = 66.0699, GNorm = 1.6336, lr_0 = 2.0163e-04
Loss = 3.2512e-01, PNorm = 66.0839, GNorm = 1.8901, lr_0 = 2.0537e-04
Loss = 3.1443e-01, PNorm = 66.1003, GNorm = 1.8781, lr_0 = 2.0913e-04
Loss = 2.9655e-01, PNorm = 66.1154, GNorm = 1.7876, lr_0 = 2.1288e-04
Loss = 3.1878e-01, PNorm = 66.1330, GNorm = 2.9056, lr_0 = 2.1663e-04
Loss = 3.1793e-01, PNorm = 66.1495, GNorm = 1.5722, lr_0 = 2.2038e-04
Loss = 3.3618e-01, PNorm = 66.1669, GNorm = 2.0173, lr_0 = 2.2412e-04
Loss = 2.9183e-01, PNorm = 66.1850, GNorm = 1.5102, lr_0 = 2.2787e-04
Loss = 3.3973e-01, PNorm = 66.2047, GNorm = 1.6054, lr_0 = 2.3163e-04
Loss = 3.3885e-01, PNorm = 66.2251, GNorm = 2.6672, lr_0 = 2.3538e-04
Loss = 2.9085e-01, PNorm = 66.2445, GNorm = 1.3396, lr_0 = 2.3913e-04
Loss = 3.2622e-01, PNorm = 66.2680, GNorm = 1.3243, lr_0 = 2.4288e-04
Loss = 2.8877e-01, PNorm = 66.2880, GNorm = 1.5635, lr_0 = 2.4662e-04
Loss = 3.3212e-01, PNorm = 66.3055, GNorm = 1.7354, lr_0 = 2.5038e-04
Loss = 2.8887e-01, PNorm = 66.3264, GNorm = 1.3737, lr_0 = 2.5413e-04
Loss = 2.9742e-01, PNorm = 66.3473, GNorm = 1.6138, lr_0 = 2.5788e-04
Loss = 2.6978e-01, PNorm = 66.3664, GNorm = 1.6590, lr_0 = 2.6163e-04
Loss = 2.7596e-01, PNorm = 66.3861, GNorm = 1.7777, lr_0 = 2.6537e-04
Loss = 2.7526e-01, PNorm = 66.4068, GNorm = 1.6523, lr_0 = 2.6912e-04
Loss = 3.1306e-01, PNorm = 66.4252, GNorm = 1.6363, lr_0 = 2.7288e-04
Loss = 2.8968e-01, PNorm = 66.4523, GNorm = 1.0148, lr_0 = 2.7663e-04
Loss = 2.8848e-01, PNorm = 66.4792, GNorm = 1.7227, lr_0 = 2.8038e-04
Loss = 3.0120e-01, PNorm = 66.5046, GNorm = 2.1128, lr_0 = 2.8413e-04
Loss = 2.8971e-01, PNorm = 66.5297, GNorm = 1.8506, lr_0 = 2.8787e-04
Loss = 2.7954e-01, PNorm = 66.5555, GNorm = 0.9722, lr_0 = 2.9163e-04
Loss = 3.1504e-01, PNorm = 66.5844, GNorm = 1.2850, lr_0 = 2.9538e-04
Loss = 3.0678e-01, PNorm = 66.6057, GNorm = 1.2523, lr_0 = 2.9913e-04
Loss = 2.7982e-01, PNorm = 66.6404, GNorm = 1.1957, lr_0 = 3.0288e-04
Loss = 2.6329e-01, PNorm = 66.6659, GNorm = 1.0226, lr_0 = 3.0662e-04
Loss = 3.1577e-01, PNorm = 66.6908, GNorm = 1.3251, lr_0 = 3.1037e-04
Loss = 2.9484e-01, PNorm = 66.7231, GNorm = 1.4773, lr_0 = 3.1413e-04
Loss = 3.2056e-01, PNorm = 66.7508, GNorm = 0.9876, lr_0 = 3.1788e-04
Loss = 3.5193e-01, PNorm = 66.7853, GNorm = 1.2983, lr_0 = 3.2163e-04
Loss = 3.0475e-01, PNorm = 66.8224, GNorm = 1.2522, lr_0 = 3.2538e-04
Loss = 2.7345e-01, PNorm = 66.8577, GNorm = 1.5308, lr_0 = 3.2912e-04
Loss = 3.2258e-01, PNorm = 66.8895, GNorm = 1.2127, lr_0 = 3.3288e-04
Loss = 2.8254e-01, PNorm = 66.9241, GNorm = 1.2449, lr_0 = 3.3663e-04
Loss = 3.0267e-01, PNorm = 66.9521, GNorm = 1.2786, lr_0 = 3.4038e-04
Loss = 2.7198e-01, PNorm = 66.9872, GNorm = 1.4902, lr_0 = 3.4413e-04
Loss = 2.7825e-01, PNorm = 67.0205, GNorm = 1.0336, lr_0 = 3.4787e-04
Loss = 2.8650e-01, PNorm = 67.0526, GNorm = 1.0792, lr_0 = 3.5162e-04
Loss = 2.6745e-01, PNorm = 67.0897, GNorm = 1.3360, lr_0 = 3.5538e-04
Loss = 3.1296e-01, PNorm = 67.1275, GNorm = 1.6905, lr_0 = 3.5913e-04
Loss = 3.2533e-01, PNorm = 67.1655, GNorm = 1.2421, lr_0 = 3.6288e-04
Loss = 2.7514e-01, PNorm = 67.2062, GNorm = 1.5120, lr_0 = 3.6662e-04
Loss = 3.0383e-01, PNorm = 67.2385, GNorm = 1.4886, lr_0 = 3.7037e-04
Loss = 2.6624e-01, PNorm = 67.2775, GNorm = 1.1861, lr_0 = 3.7413e-04
Loss = 2.7156e-01, PNorm = 67.3110, GNorm = 1.1993, lr_0 = 3.7788e-04
Loss = 2.5049e-01, PNorm = 67.3453, GNorm = 1.1228, lr_0 = 3.8163e-04
Loss = 2.7200e-01, PNorm = 67.3810, GNorm = 1.1759, lr_0 = 3.8537e-04
Loss = 2.5812e-01, PNorm = 67.4133, GNorm = 1.0831, lr_0 = 3.8912e-04
Loss = 2.6022e-01, PNorm = 67.4517, GNorm = 1.1842, lr_0 = 3.9287e-04
Loss = 2.5050e-01, PNorm = 67.4863, GNorm = 1.3779, lr_0 = 3.9663e-04
Loss = 2.2584e-01, PNorm = 67.5170, GNorm = 1.1365, lr_0 = 4.0038e-04
Loss = 2.7873e-01, PNorm = 67.5584, GNorm = 1.1547, lr_0 = 4.0413e-04
Loss = 2.5175e-01, PNorm = 67.5938, GNorm = 0.9828, lr_0 = 4.0787e-04
Loss = 2.6619e-01, PNorm = 67.6346, GNorm = 1.2005, lr_0 = 4.1162e-04
Loss = 2.5388e-01, PNorm = 67.6729, GNorm = 1.3225, lr_0 = 4.1537e-04
Loss = 2.8966e-01, PNorm = 67.7157, GNorm = 2.1214, lr_0 = 4.1913e-04
Loss = 3.0785e-01, PNorm = 67.7595, GNorm = 1.3542, lr_0 = 4.2288e-04
Loss = 2.4452e-01, PNorm = 67.8094, GNorm = 0.9617, lr_0 = 4.2662e-04
Loss = 2.9661e-01, PNorm = 67.8561, GNorm = 1.0558, lr_0 = 4.3037e-04
Loss = 2.7798e-01, PNorm = 67.8993, GNorm = 0.9800, lr_0 = 4.3412e-04
Loss = 2.9479e-01, PNorm = 67.9585, GNorm = 1.1276, lr_0 = 4.3788e-04
Loss = 2.5564e-01, PNorm = 68.0012, GNorm = 0.8527, lr_0 = 4.4163e-04
Loss = 2.8815e-01, PNorm = 68.0463, GNorm = 0.9633, lr_0 = 4.4538e-04
Loss = 2.6542e-01, PNorm = 68.0884, GNorm = 1.1507, lr_0 = 4.4912e-04
Loss = 2.5753e-01, PNorm = 68.1298, GNorm = 0.9348, lr_0 = 4.5287e-04
Loss = 2.8842e-01, PNorm = 68.1788, GNorm = 1.3451, lr_0 = 4.5662e-04
Loss = 2.6092e-01, PNorm = 68.2289, GNorm = 0.9527, lr_0 = 4.6038e-04
Loss = 2.5173e-01, PNorm = 68.2836, GNorm = 1.5354, lr_0 = 4.6413e-04
Loss = 2.7383e-01, PNorm = 68.3286, GNorm = 1.2867, lr_0 = 4.6787e-04
Loss = 2.6696e-01, PNorm = 68.3830, GNorm = 1.1768, lr_0 = 4.7162e-04
Loss = 2.6359e-01, PNorm = 68.4331, GNorm = 0.9651, lr_0 = 4.7537e-04
Loss = 2.3815e-01, PNorm = 68.4845, GNorm = 1.0233, lr_0 = 4.7913e-04
Loss = 2.4646e-01, PNorm = 68.5322, GNorm = 0.8363, lr_0 = 4.8288e-04
Loss = 2.3683e-01, PNorm = 68.5790, GNorm = 0.8190, lr_0 = 4.8663e-04
Loss = 2.5590e-01, PNorm = 68.6317, GNorm = 0.9014, lr_0 = 4.9038e-04
Loss = 2.4726e-01, PNorm = 68.6818, GNorm = 0.9679, lr_0 = 4.9412e-04
Loss = 2.4291e-01, PNorm = 68.7323, GNorm = 1.0735, lr_0 = 4.9788e-04
Loss = 2.7411e-01, PNorm = 68.7881, GNorm = 1.1320, lr_0 = 5.0163e-04
Loss = 2.8252e-01, PNorm = 68.8387, GNorm = 1.5572, lr_0 = 5.0538e-04
Loss = 2.8790e-01, PNorm = 68.9041, GNorm = 0.8846, lr_0 = 5.0913e-04
Loss = 2.3799e-01, PNorm = 68.9635, GNorm = 1.2281, lr_0 = 5.1287e-04
Loss = 2.8695e-01, PNorm = 69.0196, GNorm = 0.9741, lr_0 = 5.1663e-04
Loss = 2.6744e-01, PNorm = 69.0805, GNorm = 1.1487, lr_0 = 5.2038e-04
Loss = 2.3760e-01, PNorm = 69.1330, GNorm = 0.7919, lr_0 = 5.2413e-04
Loss = 2.5151e-01, PNorm = 69.1915, GNorm = 0.8998, lr_0 = 5.2788e-04
Loss = 2.3638e-01, PNorm = 69.2504, GNorm = 1.1477, lr_0 = 5.3162e-04
Loss = 2.4903e-01, PNorm = 69.3083, GNorm = 1.0701, lr_0 = 5.3538e-04
Loss = 2.8321e-01, PNorm = 69.3705, GNorm = 1.0401, lr_0 = 5.3912e-04
Loss = 2.4071e-01, PNorm = 69.4387, GNorm = 1.5508, lr_0 = 5.4288e-04
Loss = 2.4474e-01, PNorm = 69.5001, GNorm = 0.8516, lr_0 = 5.4663e-04
Loss = 2.4460e-01, PNorm = 69.5632, GNorm = 1.0937, lr_0 = 5.5038e-04
Validation mae = 0.317424
Epoch 1
Loss = 1.9671e-01, PNorm = 69.6327, GNorm = 0.6035, lr_0 = 5.5413e-04
Loss = 1.6796e-01, PNorm = 69.6923, GNorm = 1.0072, lr_0 = 5.5787e-04
Loss = 1.6510e-01, PNorm = 69.7526, GNorm = 0.9289, lr_0 = 5.6163e-04
Loss = 1.8749e-01, PNorm = 69.8083, GNorm = 0.8318, lr_0 = 5.6538e-04
Loss = 1.9321e-01, PNorm = 69.8808, GNorm = 0.9430, lr_0 = 5.6913e-04
Loss = 1.7049e-01, PNorm = 69.9428, GNorm = 0.8728, lr_0 = 5.7288e-04
Loss = 1.8277e-01, PNorm = 70.0047, GNorm = 0.6739, lr_0 = 5.7662e-04
Loss = 1.8992e-01, PNorm = 70.0738, GNorm = 0.8855, lr_0 = 5.8038e-04
Loss = 1.7539e-01, PNorm = 70.1395, GNorm = 0.9916, lr_0 = 5.8413e-04
Loss = 1.7851e-01, PNorm = 70.2096, GNorm = 0.6385, lr_0 = 5.8788e-04
Loss = 1.8598e-01, PNorm = 70.2801, GNorm = 0.9268, lr_0 = 5.9163e-04
Loss = 1.5014e-01, PNorm = 70.3441, GNorm = 0.7103, lr_0 = 5.9538e-04
Loss = 1.9172e-01, PNorm = 70.4131, GNorm = 0.8415, lr_0 = 5.9913e-04
Loss = 1.8833e-01, PNorm = 70.4930, GNorm = 0.9990, lr_0 = 6.0288e-04
Loss = 1.9766e-01, PNorm = 70.5647, GNorm = 0.8629, lr_0 = 6.0663e-04
Loss = 1.7957e-01, PNorm = 70.6413, GNorm = 0.8174, lr_0 = 6.1038e-04
Loss = 1.6102e-01, PNorm = 70.7250, GNorm = 0.7957, lr_0 = 6.1413e-04
Loss = 1.8576e-01, PNorm = 70.7986, GNorm = 1.2624, lr_0 = 6.1788e-04
Loss = 1.8389e-01, PNorm = 70.8752, GNorm = 0.8505, lr_0 = 6.2163e-04
Loss = 1.7764e-01, PNorm = 70.9594, GNorm = 0.8423, lr_0 = 6.2538e-04
Loss = 1.6105e-01, PNorm = 71.0381, GNorm = 0.7587, lr_0 = 6.2913e-04
Loss = 1.9774e-01, PNorm = 71.1253, GNorm = 0.8176, lr_0 = 6.3288e-04
Loss = 2.2774e-01, PNorm = 71.2072, GNorm = 1.0985, lr_0 = 6.3663e-04
Loss = 1.8506e-01, PNorm = 71.3104, GNorm = 0.9576, lr_0 = 6.4038e-04
Loss = 1.8831e-01, PNorm = 71.3874, GNorm = 0.9031, lr_0 = 6.4413e-04
Loss = 1.8884e-01, PNorm = 71.4870, GNorm = 0.7515, lr_0 = 6.4788e-04
Loss = 1.7923e-01, PNorm = 71.5631, GNorm = 0.7263, lr_0 = 6.5163e-04
Loss = 1.5695e-01, PNorm = 71.6460, GNorm = 0.9853, lr_0 = 6.5538e-04
Loss = 1.8838e-01, PNorm = 71.7230, GNorm = 0.9031, lr_0 = 6.5913e-04
Loss = 1.7489e-01, PNorm = 71.8136, GNorm = 1.0654, lr_0 = 6.6288e-04
Loss = 1.8705e-01, PNorm = 71.9127, GNorm = 0.8646, lr_0 = 6.6663e-04
Loss = 1.9538e-01, PNorm = 72.0040, GNorm = 0.9557, lr_0 = 6.7038e-04
Loss = 2.0324e-01, PNorm = 72.1150, GNorm = 0.7686, lr_0 = 6.7413e-04
Loss = 1.6616e-01, PNorm = 72.2079, GNorm = 0.9398, lr_0 = 6.7788e-04
Loss = 1.9690e-01, PNorm = 72.2999, GNorm = 0.8572, lr_0 = 6.8163e-04
Loss = 1.7593e-01, PNorm = 72.4038, GNorm = 0.9325, lr_0 = 6.8538e-04
Loss = 2.0448e-01, PNorm = 72.5054, GNorm = 0.7831, lr_0 = 6.8913e-04
Loss = 2.1579e-01, PNorm = 72.6179, GNorm = 0.9523, lr_0 = 6.9288e-04
Loss = 2.0146e-01, PNorm = 72.7252, GNorm = 0.8960, lr_0 = 6.9663e-04
Loss = 1.9552e-01, PNorm = 72.8263, GNorm = 0.9887, lr_0 = 7.0038e-04
Loss = 1.9841e-01, PNorm = 72.9289, GNorm = 1.0011, lr_0 = 7.0413e-04
Loss = 1.7543e-01, PNorm = 73.0287, GNorm = 0.7420, lr_0 = 7.0788e-04
Loss = 2.0770e-01, PNorm = 73.1293, GNorm = 0.9014, lr_0 = 7.1163e-04
Loss = 1.9160e-01, PNorm = 73.2347, GNorm = 0.8086, lr_0 = 7.1538e-04
Loss = 1.6970e-01, PNorm = 73.3426, GNorm = 0.8765, lr_0 = 7.1913e-04
Loss = 1.9035e-01, PNorm = 73.4567, GNorm = 0.9007, lr_0 = 7.2288e-04
Loss = 2.1499e-01, PNorm = 73.5604, GNorm = 0.7834, lr_0 = 7.2663e-04
Loss = 2.2471e-01, PNorm = 73.6825, GNorm = 0.8389, lr_0 = 7.3038e-04
Loss = 2.0166e-01, PNorm = 73.7955, GNorm = 1.1150, lr_0 = 7.3413e-04
Loss = 1.9292e-01, PNorm = 73.9125, GNorm = 0.7727, lr_0 = 7.3788e-04
Loss = 1.8126e-01, PNorm = 74.0158, GNorm = 0.8149, lr_0 = 7.4163e-04
Loss = 1.9447e-01, PNorm = 74.1172, GNorm = 0.6238, lr_0 = 7.4538e-04
Loss = 1.9454e-01, PNorm = 74.2130, GNorm = 0.6747, lr_0 = 7.4913e-04
Loss = 1.8835e-01, PNorm = 74.3118, GNorm = 0.8093, lr_0 = 7.5288e-04
Loss = 1.7944e-01, PNorm = 74.4258, GNorm = 1.1380, lr_0 = 7.5663e-04
Loss = 2.1120e-01, PNorm = 74.5199, GNorm = 1.1106, lr_0 = 7.6038e-04
Loss = 2.0348e-01, PNorm = 74.6346, GNorm = 0.9264, lr_0 = 7.6413e-04
Loss = 1.7921e-01, PNorm = 74.7492, GNorm = 0.9611, lr_0 = 7.6788e-04
Loss = 1.8818e-01, PNorm = 74.8629, GNorm = 0.7695, lr_0 = 7.7163e-04
Loss = 1.8388e-01, PNorm = 74.9735, GNorm = 0.9321, lr_0 = 7.7538e-04
Loss = 1.9891e-01, PNorm = 75.0815, GNorm = 0.8129, lr_0 = 7.7913e-04
Loss = 2.0963e-01, PNorm = 75.2003, GNorm = 1.4424, lr_0 = 7.8288e-04
Loss = 2.0115e-01, PNorm = 75.3114, GNorm = 0.8795, lr_0 = 7.8663e-04
Loss = 2.1055e-01, PNorm = 75.4344, GNorm = 1.1198, lr_0 = 7.9038e-04
Loss = 2.1742e-01, PNorm = 75.5484, GNorm = 0.5947, lr_0 = 7.9413e-04
Loss = 2.1339e-01, PNorm = 75.6859, GNorm = 0.7660, lr_0 = 7.9788e-04
Loss = 2.0577e-01, PNorm = 75.8020, GNorm = 1.2012, lr_0 = 8.0163e-04
Loss = 2.1400e-01, PNorm = 75.9091, GNorm = 0.8750, lr_0 = 8.0538e-04
Loss = 1.9137e-01, PNorm = 76.0192, GNorm = 1.1584, lr_0 = 8.0913e-04
Loss = 2.1947e-01, PNorm = 76.1257, GNorm = 0.9156, lr_0 = 8.1288e-04
Loss = 2.1363e-01, PNorm = 76.2431, GNorm = 0.9052, lr_0 = 8.1663e-04
Loss = 2.0375e-01, PNorm = 76.3577, GNorm = 0.9090, lr_0 = 8.2038e-04
Loss = 2.0063e-01, PNorm = 76.4837, GNorm = 0.8754, lr_0 = 8.2413e-04
Loss = 2.0727e-01, PNorm = 76.5878, GNorm = 0.8403, lr_0 = 8.2788e-04
Loss = 1.9638e-01, PNorm = 76.7027, GNorm = 0.8339, lr_0 = 8.3163e-04
Loss = 2.0463e-01, PNorm = 76.8110, GNorm = 0.5826, lr_0 = 8.3538e-04
Loss = 1.9894e-01, PNorm = 76.9241, GNorm = 1.1675, lr_0 = 8.3913e-04
Loss = 2.0898e-01, PNorm = 77.0383, GNorm = 0.8278, lr_0 = 8.4288e-04
Loss = 1.8521e-01, PNorm = 77.1517, GNorm = 1.1934, lr_0 = 8.4663e-04
Loss = 1.9350e-01, PNorm = 77.2681, GNorm = 0.7839, lr_0 = 8.5038e-04
Loss = 2.0054e-01, PNorm = 77.3817, GNorm = 1.0337, lr_0 = 8.5413e-04
Loss = 1.8151e-01, PNorm = 77.5077, GNorm = 0.5429, lr_0 = 8.5788e-04
Loss = 2.0500e-01, PNorm = 77.6263, GNorm = 1.0126, lr_0 = 8.6163e-04
Loss = 2.0788e-01, PNorm = 77.7429, GNorm = 0.6793, lr_0 = 8.6538e-04
Loss = 1.7597e-01, PNorm = 77.8657, GNorm = 0.7252, lr_0 = 8.6913e-04
Loss = 2.0975e-01, PNorm = 77.9757, GNorm = 0.8725, lr_0 = 8.7288e-04
Loss = 2.0759e-01, PNorm = 78.1094, GNorm = 0.9682, lr_0 = 8.7663e-04
Loss = 2.1136e-01, PNorm = 78.2344, GNorm = 1.2278, lr_0 = 8.8038e-04
Loss = 1.9564e-01, PNorm = 78.3761, GNorm = 0.6820, lr_0 = 8.8413e-04
Loss = 2.5517e-01, PNorm = 78.5222, GNorm = 0.6518, lr_0 = 8.8788e-04
Loss = 1.7265e-01, PNorm = 78.6749, GNorm = 1.0371, lr_0 = 8.9163e-04
Loss = 2.1153e-01, PNorm = 78.8042, GNorm = 0.6056, lr_0 = 8.9538e-04
Loss = 2.1191e-01, PNorm = 78.9584, GNorm = 1.0967, lr_0 = 8.9913e-04
Loss = 2.2746e-01, PNorm = 79.1150, GNorm = 0.7692, lr_0 = 9.0288e-04
Loss = 2.1834e-01, PNorm = 79.2646, GNorm = 0.9213, lr_0 = 9.0663e-04
Loss = 2.0430e-01, PNorm = 79.4271, GNorm = 0.7901, lr_0 = 9.1038e-04
Loss = 2.1535e-01, PNorm = 79.5898, GNorm = 0.5901, lr_0 = 9.1413e-04
Loss = 2.3280e-01, PNorm = 79.7460, GNorm = 0.8704, lr_0 = 9.1788e-04
Loss = 2.1259e-01, PNorm = 79.9145, GNorm = 0.8575, lr_0 = 9.2163e-04
Loss = 2.0343e-01, PNorm = 80.0606, GNorm = 0.9869, lr_0 = 9.2538e-04
Loss = 2.1704e-01, PNorm = 80.2215, GNorm = 0.6771, lr_0 = 9.2913e-04
Loss = 2.2336e-01, PNorm = 80.3642, GNorm = 0.8368, lr_0 = 9.3288e-04
Loss = 2.0573e-01, PNorm = 80.5174, GNorm = 0.9821, lr_0 = 9.3663e-04
Loss = 2.1580e-01, PNorm = 80.6647, GNorm = 0.5782, lr_0 = 9.4038e-04
Loss = 2.3651e-01, PNorm = 80.8184, GNorm = 1.6143, lr_0 = 9.4413e-04
Loss = 2.1230e-01, PNorm = 80.9540, GNorm = 0.9610, lr_0 = 9.4788e-04
Loss = 2.4365e-01, PNorm = 81.1158, GNorm = 0.9163, lr_0 = 9.5163e-04
Loss = 1.9103e-01, PNorm = 81.2528, GNorm = 0.5721, lr_0 = 9.5538e-04
Loss = 2.1338e-01, PNorm = 81.3900, GNorm = 0.8459, lr_0 = 9.5913e-04
Loss = 2.1243e-01, PNorm = 81.5282, GNorm = 0.7672, lr_0 = 9.6288e-04
Loss = 2.1956e-01, PNorm = 81.6766, GNorm = 0.6312, lr_0 = 9.6663e-04
Loss = 1.7550e-01, PNorm = 81.8108, GNorm = 0.5047, lr_0 = 9.7038e-04
Loss = 2.2365e-01, PNorm = 81.9439, GNorm = 0.9128, lr_0 = 9.7413e-04
Loss = 2.3096e-01, PNorm = 82.0793, GNorm = 0.9557, lr_0 = 9.7788e-04
Loss = 2.0937e-01, PNorm = 82.2210, GNorm = 1.1526, lr_0 = 9.8163e-04
Loss = 1.9691e-01, PNorm = 82.3546, GNorm = 0.8673, lr_0 = 9.8537e-04
Loss = 2.0395e-01, PNorm = 82.4940, GNorm = 0.8245, lr_0 = 9.8912e-04
Loss = 1.9768e-01, PNorm = 82.6191, GNorm = 0.4683, lr_0 = 9.9288e-04
Loss = 2.0428e-01, PNorm = 82.7558, GNorm = 1.2914, lr_0 = 9.9663e-04
Loss = 2.0916e-01, PNorm = 82.9040, GNorm = 0.8036, lr_0 = 9.9993e-04
Validation mae = 0.306886
Epoch 2
Loss = 1.3078e-01, PNorm = 83.0524, GNorm = 0.6123, lr_0 = 9.9925e-04
Loss = 1.2512e-01, PNorm = 83.1831, GNorm = 1.1142, lr_0 = 9.9856e-04
Loss = 1.2560e-01, PNorm = 83.2936, GNorm = 0.6408, lr_0 = 9.9788e-04
Loss = 1.3041e-01, PNorm = 83.4169, GNorm = 0.6456, lr_0 = 9.9719e-04
Loss = 1.1732e-01, PNorm = 83.5249, GNorm = 1.0976, lr_0 = 9.9651e-04
Loss = 1.2625e-01, PNorm = 83.6500, GNorm = 0.6846, lr_0 = 9.9583e-04
Loss = 1.1240e-01, PNorm = 83.7841, GNorm = 0.4233, lr_0 = 9.9515e-04
Loss = 1.4043e-01, PNorm = 83.9101, GNorm = 0.5405, lr_0 = 9.9446e-04
Loss = 1.2415e-01, PNorm = 84.0370, GNorm = 0.8802, lr_0 = 9.9378e-04
Loss = 1.3605e-01, PNorm = 84.1597, GNorm = 0.5540, lr_0 = 9.9310e-04
Loss = 1.3645e-01, PNorm = 84.2802, GNorm = 0.6767, lr_0 = 9.9242e-04
Loss = 1.1898e-01, PNorm = 84.3953, GNorm = 0.6005, lr_0 = 9.9174e-04
Loss = 1.1311e-01, PNorm = 84.5098, GNorm = 0.6920, lr_0 = 9.9106e-04
Loss = 1.3198e-01, PNorm = 84.6291, GNorm = 0.7744, lr_0 = 9.9038e-04
Loss = 1.4702e-01, PNorm = 84.7570, GNorm = 0.6407, lr_0 = 9.8971e-04
Loss = 1.1625e-01, PNorm = 84.8875, GNorm = 0.7241, lr_0 = 9.8903e-04
Loss = 1.2548e-01, PNorm = 85.0146, GNorm = 0.6903, lr_0 = 9.8835e-04
Loss = 1.2909e-01, PNorm = 85.1520, GNorm = 0.5447, lr_0 = 9.8767e-04
Loss = 1.3046e-01, PNorm = 85.2736, GNorm = 0.5926, lr_0 = 9.8700e-04
Loss = 1.1901e-01, PNorm = 85.3957, GNorm = 0.7117, lr_0 = 9.8632e-04
Loss = 1.4177e-01, PNorm = 85.5253, GNorm = 0.7998, lr_0 = 9.8564e-04
Loss = 1.2335e-01, PNorm = 85.6764, GNorm = 0.7632, lr_0 = 9.8497e-04
Loss = 1.2827e-01, PNorm = 85.8139, GNorm = 1.0482, lr_0 = 9.8429e-04
Loss = 1.3682e-01, PNorm = 85.9519, GNorm = 0.7212, lr_0 = 9.8362e-04
Loss = 1.3866e-01, PNorm = 86.0931, GNorm = 1.2833, lr_0 = 9.8295e-04
Loss = 1.2695e-01, PNorm = 86.2295, GNorm = 0.6188, lr_0 = 9.8227e-04
Loss = 1.3947e-01, PNorm = 86.3815, GNorm = 0.4970, lr_0 = 9.8160e-04
Loss = 1.3355e-01, PNorm = 86.5122, GNorm = 0.6028, lr_0 = 9.8093e-04
Loss = 1.2295e-01, PNorm = 86.6575, GNorm = 0.6145, lr_0 = 9.8026e-04
Loss = 1.2346e-01, PNorm = 86.7683, GNorm = 0.9603, lr_0 = 9.7958e-04
Loss = 1.3363e-01, PNorm = 86.8922, GNorm = 0.8514, lr_0 = 9.7891e-04
Loss = 1.3893e-01, PNorm = 87.0093, GNorm = 0.7695, lr_0 = 9.7824e-04
Loss = 1.4178e-01, PNorm = 87.1337, GNorm = 0.5682, lr_0 = 9.7757e-04
Loss = 1.4536e-01, PNorm = 87.2716, GNorm = 0.6065, lr_0 = 9.7690e-04
Loss = 1.3905e-01, PNorm = 87.4048, GNorm = 0.6800, lr_0 = 9.7623e-04
Loss = 1.2903e-01, PNorm = 87.5399, GNorm = 1.1839, lr_0 = 9.7556e-04
Loss = 1.1175e-01, PNorm = 87.6745, GNorm = 0.7362, lr_0 = 9.7490e-04
Loss = 1.4366e-01, PNorm = 87.7957, GNorm = 0.6394, lr_0 = 9.7423e-04
Loss = 1.3529e-01, PNorm = 87.9224, GNorm = 0.7631, lr_0 = 9.7356e-04
Loss = 1.3607e-01, PNorm = 88.0486, GNorm = 1.4225, lr_0 = 9.7289e-04
Loss = 1.3786e-01, PNorm = 88.1754, GNorm = 1.0507, lr_0 = 9.7223e-04
Loss = 1.4201e-01, PNorm = 88.3131, GNorm = 0.5593, lr_0 = 9.7156e-04
Loss = 1.3900e-01, PNorm = 88.4589, GNorm = 0.6402, lr_0 = 9.7090e-04
Loss = 1.3645e-01, PNorm = 88.5925, GNorm = 0.5202, lr_0 = 9.7023e-04
Loss = 1.2862e-01, PNorm = 88.7241, GNorm = 0.6103, lr_0 = 9.6957e-04
Loss = 1.3151e-01, PNorm = 88.8442, GNorm = 0.6712, lr_0 = 9.6890e-04
Loss = 1.3578e-01, PNorm = 88.9810, GNorm = 0.7332, lr_0 = 9.6824e-04
Loss = 1.4686e-01, PNorm = 89.1055, GNorm = 1.1496, lr_0 = 9.6757e-04
Loss = 1.3986e-01, PNorm = 89.2183, GNorm = 1.4271, lr_0 = 9.6691e-04
Loss = 1.3815e-01, PNorm = 89.3492, GNorm = 1.0539, lr_0 = 9.6625e-04
Loss = 1.4907e-01, PNorm = 89.4750, GNorm = 0.8793, lr_0 = 9.6559e-04
Loss = 1.3061e-01, PNorm = 89.6094, GNorm = 0.9764, lr_0 = 9.6493e-04
Loss = 1.3819e-01, PNorm = 89.7333, GNorm = 0.4767, lr_0 = 9.6427e-04
Loss = 1.2933e-01, PNorm = 89.8571, GNorm = 0.6712, lr_0 = 9.6360e-04
Loss = 1.4717e-01, PNorm = 89.9958, GNorm = 0.5475, lr_0 = 9.6294e-04
Loss = 1.3558e-01, PNorm = 90.1256, GNorm = 0.7113, lr_0 = 9.6228e-04
Loss = 1.3515e-01, PNorm = 90.2475, GNorm = 0.6369, lr_0 = 9.6163e-04
Loss = 1.2594e-01, PNorm = 90.3721, GNorm = 0.7510, lr_0 = 9.6097e-04
Loss = 1.2649e-01, PNorm = 90.4917, GNorm = 0.6856, lr_0 = 9.6031e-04
Loss = 1.3571e-01, PNorm = 90.6160, GNorm = 0.5863, lr_0 = 9.5965e-04
Loss = 1.4267e-01, PNorm = 90.7473, GNorm = 0.5412, lr_0 = 9.5899e-04
Loss = 1.2543e-01, PNorm = 90.8667, GNorm = 0.5175, lr_0 = 9.5834e-04
Loss = 1.3883e-01, PNorm = 91.0000, GNorm = 1.1698, lr_0 = 9.5768e-04
Loss = 1.3529e-01, PNorm = 91.1398, GNorm = 0.6878, lr_0 = 9.5702e-04
Loss = 1.2341e-01, PNorm = 91.2859, GNorm = 0.5389, lr_0 = 9.5637e-04
Loss = 1.3226e-01, PNorm = 91.4050, GNorm = 1.0414, lr_0 = 9.5571e-04
Loss = 1.2559e-01, PNorm = 91.5400, GNorm = 0.7467, lr_0 = 9.5506e-04
Loss = 1.2740e-01, PNorm = 91.6632, GNorm = 0.6523, lr_0 = 9.5440e-04
Loss = 1.2481e-01, PNorm = 91.7921, GNorm = 0.6511, lr_0 = 9.5375e-04
Loss = 1.4171e-01, PNorm = 91.9191, GNorm = 0.6307, lr_0 = 9.5310e-04
Loss = 1.3107e-01, PNorm = 92.0304, GNorm = 0.7676, lr_0 = 9.5244e-04
Loss = 1.4542e-01, PNorm = 92.1546, GNorm = 0.5700, lr_0 = 9.5179e-04
Loss = 1.2741e-01, PNorm = 92.2755, GNorm = 0.4755, lr_0 = 9.5114e-04
Loss = 1.3521e-01, PNorm = 92.4153, GNorm = 0.9989, lr_0 = 9.5049e-04
Loss = 1.6028e-01, PNorm = 92.5494, GNorm = 0.6796, lr_0 = 9.4984e-04
Loss = 1.4361e-01, PNorm = 92.6894, GNorm = 0.8051, lr_0 = 9.4919e-04
Loss = 1.4813e-01, PNorm = 92.8270, GNorm = 1.0006, lr_0 = 9.4854e-04
Loss = 1.4342e-01, PNorm = 92.9810, GNorm = 1.1716, lr_0 = 9.4789e-04
Loss = 1.3205e-01, PNorm = 93.1140, GNorm = 0.7250, lr_0 = 9.4724e-04
Loss = 1.4668e-01, PNorm = 93.2522, GNorm = 0.7801, lr_0 = 9.4659e-04
Loss = 1.5643e-01, PNorm = 93.3876, GNorm = 1.1503, lr_0 = 9.4594e-04
Loss = 1.3138e-01, PNorm = 93.5266, GNorm = 0.6143, lr_0 = 9.4529e-04
Loss = 1.3133e-01, PNorm = 93.6606, GNorm = 0.6523, lr_0 = 9.4464e-04
Loss = 1.5579e-01, PNorm = 93.7847, GNorm = 0.4944, lr_0 = 9.4400e-04
Loss = 1.4157e-01, PNorm = 93.9142, GNorm = 0.5535, lr_0 = 9.4335e-04
Loss = 1.2893e-01, PNorm = 94.0362, GNorm = 0.5484, lr_0 = 9.4270e-04
Loss = 1.4811e-01, PNorm = 94.1691, GNorm = 0.6061, lr_0 = 9.4206e-04
Loss = 1.5908e-01, PNorm = 94.3023, GNorm = 1.1573, lr_0 = 9.4141e-04
Loss = 1.4137e-01, PNorm = 94.4507, GNorm = 0.7048, lr_0 = 9.4077e-04
Loss = 1.5260e-01, PNorm = 94.5868, GNorm = 1.0146, lr_0 = 9.4012e-04
Loss = 1.3232e-01, PNorm = 94.7162, GNorm = 0.5509, lr_0 = 9.3948e-04
Loss = 1.2924e-01, PNorm = 94.8318, GNorm = 0.8114, lr_0 = 9.3884e-04
Loss = 1.4278e-01, PNorm = 94.9497, GNorm = 0.4478, lr_0 = 9.3819e-04
Loss = 1.4295e-01, PNorm = 95.0882, GNorm = 0.7280, lr_0 = 9.3755e-04
Loss = 1.5488e-01, PNorm = 95.2040, GNorm = 0.7626, lr_0 = 9.3691e-04
Loss = 1.4586e-01, PNorm = 95.3401, GNorm = 1.0372, lr_0 = 9.3627e-04
Loss = 1.5237e-01, PNorm = 95.4765, GNorm = 0.5103, lr_0 = 9.3562e-04
Loss = 1.3558e-01, PNorm = 95.6136, GNorm = 0.6494, lr_0 = 9.3498e-04
Loss = 1.2030e-01, PNorm = 95.7366, GNorm = 0.6585, lr_0 = 9.3434e-04
Loss = 1.5778e-01, PNorm = 95.8545, GNorm = 0.9408, lr_0 = 9.3370e-04
Loss = 1.3268e-01, PNorm = 95.9811, GNorm = 0.8264, lr_0 = 9.3306e-04
Loss = 1.4965e-01, PNorm = 96.1168, GNorm = 0.8484, lr_0 = 9.3242e-04
Loss = 1.4688e-01, PNorm = 96.2504, GNorm = 0.5496, lr_0 = 9.3178e-04
Loss = 1.5909e-01, PNorm = 96.3749, GNorm = 0.7583, lr_0 = 9.3115e-04
Loss = 1.4988e-01, PNorm = 96.5117, GNorm = 0.5770, lr_0 = 9.3051e-04
Loss = 1.3685e-01, PNorm = 96.6309, GNorm = 0.8709, lr_0 = 9.2987e-04
Loss = 1.3212e-01, PNorm = 96.7614, GNorm = 0.6536, lr_0 = 9.2923e-04
Loss = 1.5081e-01, PNorm = 96.8776, GNorm = 0.5517, lr_0 = 9.2860e-04
Loss = 1.3456e-01, PNorm = 97.0098, GNorm = 0.6940, lr_0 = 9.2796e-04
Loss = 1.4586e-01, PNorm = 97.1072, GNorm = 0.5800, lr_0 = 9.2733e-04
Loss = 1.4893e-01, PNorm = 97.2279, GNorm = 1.1456, lr_0 = 9.2669e-04
Loss = 1.4388e-01, PNorm = 97.3330, GNorm = 0.6346, lr_0 = 9.2606e-04
Loss = 1.3380e-01, PNorm = 97.4548, GNorm = 0.5302, lr_0 = 9.2542e-04
Loss = 1.6049e-01, PNorm = 97.5616, GNorm = 0.6681, lr_0 = 9.2479e-04
Loss = 1.5517e-01, PNorm = 97.6929, GNorm = 0.6746, lr_0 = 9.2415e-04
Loss = 1.5003e-01, PNorm = 97.8235, GNorm = 0.9183, lr_0 = 9.2352e-04
Loss = 1.5265e-01, PNorm = 97.9586, GNorm = 0.8464, lr_0 = 9.2289e-04
Loss = 1.2450e-01, PNorm = 98.0758, GNorm = 0.5894, lr_0 = 9.2226e-04
Loss = 1.3491e-01, PNorm = 98.2040, GNorm = 1.0642, lr_0 = 9.2162e-04
Loss = 1.5302e-01, PNorm = 98.3151, GNorm = 0.7123, lr_0 = 9.2099e-04
Validation mae = 0.297604
Epoch 3
Loss = 1.0471e-01, PNorm = 98.4440, GNorm = 0.3896, lr_0 = 9.2036e-04
Loss = 8.9858e-02, PNorm = 98.5412, GNorm = 0.7625, lr_0 = 9.1973e-04
Loss = 8.5457e-02, PNorm = 98.6387, GNorm = 0.4048, lr_0 = 9.1910e-04
Loss = 8.8049e-02, PNorm = 98.7046, GNorm = 0.6681, lr_0 = 9.1847e-04
Loss = 7.8038e-02, PNorm = 98.7892, GNorm = 0.7018, lr_0 = 9.1784e-04
Loss = 7.9298e-02, PNorm = 98.8605, GNorm = 0.7356, lr_0 = 9.1721e-04
Loss = 9.0534e-02, PNorm = 98.9395, GNorm = 0.6207, lr_0 = 9.1658e-04
Loss = 7.2643e-02, PNorm = 99.0150, GNorm = 0.9294, lr_0 = 9.1596e-04
Loss = 6.7580e-02, PNorm = 99.1026, GNorm = 0.5753, lr_0 = 9.1533e-04
Loss = 7.8490e-02, PNorm = 99.1734, GNorm = 0.5226, lr_0 = 9.1470e-04
Loss = 6.7979e-02, PNorm = 99.2521, GNorm = 0.4166, lr_0 = 9.1408e-04
Loss = 7.8050e-02, PNorm = 99.3226, GNorm = 0.3019, lr_0 = 9.1345e-04
Loss = 6.2945e-02, PNorm = 99.3930, GNorm = 0.8713, lr_0 = 9.1282e-04
Loss = 8.2652e-02, PNorm = 99.4635, GNorm = 0.3692, lr_0 = 9.1220e-04
Loss = 7.4796e-02, PNorm = 99.5414, GNorm = 0.4088, lr_0 = 9.1157e-04
Loss = 8.0503e-02, PNorm = 99.6092, GNorm = 0.5292, lr_0 = 9.1095e-04
Loss = 7.2676e-02, PNorm = 99.6903, GNorm = 0.3448, lr_0 = 9.1032e-04
Loss = 7.0086e-02, PNorm = 99.7583, GNorm = 0.3937, lr_0 = 9.0970e-04
Loss = 7.4069e-02, PNorm = 99.8300, GNorm = 0.4980, lr_0 = 9.0908e-04
Loss = 7.8831e-02, PNorm = 99.8995, GNorm = 0.6362, lr_0 = 9.0846e-04
Loss = 8.0825e-02, PNorm = 99.9598, GNorm = 0.4864, lr_0 = 9.0783e-04
Loss = 8.4406e-02, PNorm = 100.0422, GNorm = 0.4150, lr_0 = 9.0721e-04
Loss = 7.7474e-02, PNorm = 100.1298, GNorm = 0.6030, lr_0 = 9.0659e-04
Loss = 7.9963e-02, PNorm = 100.2150, GNorm = 0.5297, lr_0 = 9.0597e-04
Loss = 9.9511e-02, PNorm = 100.3011, GNorm = 0.5704, lr_0 = 9.0535e-04
Loss = 7.2032e-02, PNorm = 100.3953, GNorm = 1.0291, lr_0 = 9.0473e-04
Loss = 9.8388e-02, PNorm = 100.4862, GNorm = 0.7463, lr_0 = 9.0411e-04
Loss = 9.0138e-02, PNorm = 100.5810, GNorm = 0.5512, lr_0 = 9.0349e-04
Loss = 7.4909e-02, PNorm = 100.6790, GNorm = 0.4165, lr_0 = 9.0287e-04
Loss = 9.2770e-02, PNorm = 100.7666, GNorm = 0.4633, lr_0 = 9.0225e-04
Loss = 7.7921e-02, PNorm = 100.8540, GNorm = 0.6305, lr_0 = 9.0163e-04
Loss = 8.2201e-02, PNorm = 100.9342, GNorm = 0.5723, lr_0 = 9.0102e-04
Loss = 1.0366e-01, PNorm = 101.0281, GNorm = 0.5035, lr_0 = 9.0040e-04
Loss = 8.3983e-02, PNorm = 101.1354, GNorm = 0.5848, lr_0 = 8.9978e-04
Loss = 8.9455e-02, PNorm = 101.2268, GNorm = 0.6538, lr_0 = 8.9916e-04
Loss = 7.8628e-02, PNorm = 101.3219, GNorm = 0.4119, lr_0 = 8.9855e-04
Loss = 8.0330e-02, PNorm = 101.4172, GNorm = 0.4114, lr_0 = 8.9793e-04
Loss = 7.3449e-02, PNorm = 101.4991, GNorm = 0.5873, lr_0 = 8.9732e-04
Loss = 8.8512e-02, PNorm = 101.5901, GNorm = 0.4618, lr_0 = 8.9670e-04
Loss = 8.3506e-02, PNorm = 101.6623, GNorm = 0.4307, lr_0 = 8.9609e-04
Loss = 8.9063e-02, PNorm = 101.7506, GNorm = 0.5459, lr_0 = 8.9548e-04
Loss = 8.1278e-02, PNorm = 101.8297, GNorm = 0.4940, lr_0 = 8.9486e-04
Loss = 8.3378e-02, PNorm = 101.9136, GNorm = 0.3600, lr_0 = 8.9425e-04
Loss = 9.1871e-02, PNorm = 101.9982, GNorm = 0.4186, lr_0 = 8.9364e-04
Loss = 9.0866e-02, PNorm = 102.0882, GNorm = 0.6328, lr_0 = 8.9302e-04
Loss = 8.5739e-02, PNorm = 102.1898, GNorm = 0.8148, lr_0 = 8.9241e-04
Loss = 7.4370e-02, PNorm = 102.2831, GNorm = 0.5917, lr_0 = 8.9180e-04
Loss = 8.3572e-02, PNorm = 102.3733, GNorm = 0.3422, lr_0 = 8.9119e-04
Loss = 8.5580e-02, PNorm = 102.4605, GNorm = 0.6017, lr_0 = 8.9058e-04
Loss = 8.8509e-02, PNorm = 102.5557, GNorm = 0.9438, lr_0 = 8.8997e-04
Loss = 7.7767e-02, PNorm = 102.6472, GNorm = 0.3969, lr_0 = 8.8936e-04
Loss = 8.9157e-02, PNorm = 102.7412, GNorm = 1.5426, lr_0 = 8.8875e-04
Loss = 8.1676e-02, PNorm = 102.8285, GNorm = 0.4862, lr_0 = 8.8814e-04
Loss = 8.9633e-02, PNorm = 102.9087, GNorm = 0.4508, lr_0 = 8.8753e-04
Loss = 9.4791e-02, PNorm = 103.0167, GNorm = 0.3741, lr_0 = 8.8693e-04
Loss = 8.3402e-02, PNorm = 103.1074, GNorm = 0.5738, lr_0 = 8.8632e-04
Loss = 8.4806e-02, PNorm = 103.2019, GNorm = 0.4510, lr_0 = 8.8571e-04
Loss = 8.5818e-02, PNorm = 103.2982, GNorm = 0.4274, lr_0 = 8.8510e-04
Loss = 8.5660e-02, PNorm = 103.3884, GNorm = 0.6961, lr_0 = 8.8450e-04
Loss = 9.3170e-02, PNorm = 103.4868, GNorm = 0.6228, lr_0 = 8.8389e-04
Loss = 9.0272e-02, PNorm = 103.5821, GNorm = 0.7900, lr_0 = 8.8329e-04
Loss = 9.4210e-02, PNorm = 103.6789, GNorm = 0.3709, lr_0 = 8.8268e-04
Loss = 9.6019e-02, PNorm = 103.7705, GNorm = 1.0660, lr_0 = 8.8208e-04
Loss = 1.0743e-01, PNorm = 103.8818, GNorm = 0.5779, lr_0 = 8.8147e-04
Loss = 9.3850e-02, PNorm = 103.9838, GNorm = 0.6385, lr_0 = 8.8087e-04
Loss = 8.6838e-02, PNorm = 104.0867, GNorm = 0.3631, lr_0 = 8.8026e-04
Loss = 9.7967e-02, PNorm = 104.1934, GNorm = 0.6688, lr_0 = 8.7966e-04
Loss = 9.6502e-02, PNorm = 104.3108, GNorm = 0.4206, lr_0 = 8.7906e-04
Loss = 8.7328e-02, PNorm = 104.4157, GNorm = 0.5681, lr_0 = 8.7846e-04
Loss = 8.1654e-02, PNorm = 104.5204, GNorm = 1.0093, lr_0 = 8.7785e-04
Loss = 7.6860e-02, PNorm = 104.6230, GNorm = 0.5454, lr_0 = 8.7725e-04
Loss = 7.7371e-02, PNorm = 104.7178, GNorm = 0.5314, lr_0 = 8.7665e-04
Loss = 8.2305e-02, PNorm = 104.8042, GNorm = 0.4574, lr_0 = 8.7605e-04
Loss = 9.1309e-02, PNorm = 104.9146, GNorm = 0.8618, lr_0 = 8.7545e-04
Loss = 9.0542e-02, PNorm = 105.0227, GNorm = 0.3886, lr_0 = 8.7485e-04
Loss = 8.6003e-02, PNorm = 105.1270, GNorm = 0.5893, lr_0 = 8.7425e-04
Loss = 1.1260e-01, PNorm = 105.2315, GNorm = 0.9551, lr_0 = 8.7365e-04
Loss = 7.9477e-02, PNorm = 105.3415, GNorm = 0.6833, lr_0 = 8.7306e-04
Loss = 8.6855e-02, PNorm = 105.4439, GNorm = 0.7804, lr_0 = 8.7246e-04
Loss = 9.1791e-02, PNorm = 105.5552, GNorm = 0.6967, lr_0 = 8.7186e-04
Loss = 9.8900e-02, PNorm = 105.6577, GNorm = 0.4545, lr_0 = 8.7126e-04
Loss = 9.0787e-02, PNorm = 105.7615, GNorm = 0.6067, lr_0 = 8.7067e-04
Loss = 1.0042e-01, PNorm = 105.8603, GNorm = 0.4952, lr_0 = 8.7007e-04
Loss = 8.2600e-02, PNorm = 105.9612, GNorm = 0.5406, lr_0 = 8.6947e-04
Loss = 7.8040e-02, PNorm = 106.0513, GNorm = 0.8081, lr_0 = 8.6888e-04
Loss = 8.8708e-02, PNorm = 106.1433, GNorm = 0.4665, lr_0 = 8.6828e-04
Loss = 8.2506e-02, PNorm = 106.2348, GNorm = 0.4372, lr_0 = 8.6769e-04
Loss = 7.9136e-02, PNorm = 106.3283, GNorm = 0.5032, lr_0 = 8.6709e-04
Loss = 9.1518e-02, PNorm = 106.4211, GNorm = 0.6190, lr_0 = 8.6650e-04
Loss = 1.0379e-01, PNorm = 106.5241, GNorm = 0.9140, lr_0 = 8.6590e-04
Loss = 1.0372e-01, PNorm = 106.6378, GNorm = 0.7940, lr_0 = 8.6531e-04
Loss = 1.0363e-01, PNorm = 106.7621, GNorm = 0.9689, lr_0 = 8.6472e-04
Loss = 9.2664e-02, PNorm = 106.8724, GNorm = 0.6172, lr_0 = 8.6413e-04
Loss = 1.0549e-01, PNorm = 106.9970, GNorm = 0.6454, lr_0 = 8.6353e-04
Loss = 9.3404e-02, PNorm = 107.0998, GNorm = 0.8843, lr_0 = 8.6294e-04
Loss = 1.0278e-01, PNorm = 107.2144, GNorm = 0.5192, lr_0 = 8.6235e-04
Loss = 9.0809e-02, PNorm = 107.3216, GNorm = 0.5699, lr_0 = 8.6176e-04
Loss = 9.8456e-02, PNorm = 107.4285, GNorm = 1.4685, lr_0 = 8.6117e-04
Loss = 9.7596e-02, PNorm = 107.5441, GNorm = 0.5766, lr_0 = 8.6058e-04
Loss = 9.3894e-02, PNorm = 107.6562, GNorm = 0.4371, lr_0 = 8.5999e-04
Loss = 8.6891e-02, PNorm = 107.7694, GNorm = 0.6118, lr_0 = 8.5940e-04
Loss = 9.3936e-02, PNorm = 107.8686, GNorm = 2.5081, lr_0 = 8.5881e-04
Loss = 9.2976e-02, PNorm = 107.9825, GNorm = 0.9197, lr_0 = 8.5823e-04
Loss = 1.0237e-01, PNorm = 108.0871, GNorm = 0.5294, lr_0 = 8.5764e-04
Loss = 9.2449e-02, PNorm = 108.2078, GNorm = 0.3760, lr_0 = 8.5705e-04
Loss = 9.8292e-02, PNorm = 108.3212, GNorm = 1.1395, lr_0 = 8.5646e-04
Loss = 9.7141e-02, PNorm = 108.4195, GNorm = 0.6474, lr_0 = 8.5588e-04
Loss = 8.7579e-02, PNorm = 108.5258, GNorm = 0.3432, lr_0 = 8.5529e-04
Loss = 9.8941e-02, PNorm = 108.6347, GNorm = 0.4052, lr_0 = 8.5470e-04
Loss = 8.4249e-02, PNorm = 108.7381, GNorm = 0.5116, lr_0 = 8.5412e-04
Loss = 9.1975e-02, PNorm = 108.8481, GNorm = 0.5489, lr_0 = 8.5353e-04
Loss = 8.6482e-02, PNorm = 108.9475, GNorm = 0.4161, lr_0 = 8.5295e-04
Loss = 9.8296e-02, PNorm = 109.0453, GNorm = 0.8045, lr_0 = 8.5236e-04
Loss = 9.1514e-02, PNorm = 109.1472, GNorm = 0.4499, lr_0 = 8.5178e-04
Loss = 9.6980e-02, PNorm = 109.2514, GNorm = 0.9065, lr_0 = 8.5120e-04
Loss = 9.4582e-02, PNorm = 109.3602, GNorm = 0.5020, lr_0 = 8.5061e-04
Loss = 1.0922e-01, PNorm = 109.4620, GNorm = 0.8059, lr_0 = 8.5003e-04
Loss = 1.0734e-01, PNorm = 109.5756, GNorm = 1.1040, lr_0 = 8.4945e-04
Loss = 1.0662e-01, PNorm = 109.6879, GNorm = 0.6531, lr_0 = 8.4887e-04
Loss = 9.3581e-02, PNorm = 109.8038, GNorm = 0.4352, lr_0 = 8.4828e-04
Validation mae = 0.289848
Epoch 4
Loss = 6.6454e-02, PNorm = 109.8943, GNorm = 0.4625, lr_0 = 8.4770e-04
Loss = 6.1551e-02, PNorm = 109.9865, GNorm = 0.4469, lr_0 = 8.4712e-04
Loss = 5.8618e-02, PNorm = 110.0557, GNorm = 0.6530, lr_0 = 8.4654e-04
Loss = 5.7600e-02, PNorm = 110.1336, GNorm = 0.7435, lr_0 = 8.4596e-04
Loss = 5.6819e-02, PNorm = 110.1944, GNorm = 0.4261, lr_0 = 8.4538e-04
Loss = 5.4827e-02, PNorm = 110.2641, GNorm = 0.6503, lr_0 = 8.4480e-04
Loss = 5.3376e-02, PNorm = 110.3333, GNorm = 0.7231, lr_0 = 8.4423e-04
Loss = 5.6517e-02, PNorm = 110.3979, GNorm = 0.3874, lr_0 = 8.4365e-04
Loss = 5.5342e-02, PNorm = 110.4639, GNorm = 0.6138, lr_0 = 8.4307e-04
Loss = 6.3056e-02, PNorm = 110.5338, GNorm = 0.5487, lr_0 = 8.4249e-04
Loss = 5.6157e-02, PNorm = 110.5974, GNorm = 0.7121, lr_0 = 8.4191e-04
Loss = 5.9626e-02, PNorm = 110.6661, GNorm = 0.9862, lr_0 = 8.4134e-04
Loss = 4.9031e-02, PNorm = 110.7341, GNorm = 0.3339, lr_0 = 8.4076e-04
Loss = 4.9134e-02, PNorm = 110.7930, GNorm = 0.3919, lr_0 = 8.4019e-04
Loss = 5.7976e-02, PNorm = 110.8577, GNorm = 0.5174, lr_0 = 8.3961e-04
Loss = 4.6032e-02, PNorm = 110.9199, GNorm = 0.5349, lr_0 = 8.3903e-04
Loss = 5.2010e-02, PNorm = 110.9765, GNorm = 0.3908, lr_0 = 8.3846e-04
Loss = 5.3935e-02, PNorm = 111.0404, GNorm = 0.5693, lr_0 = 8.3789e-04
Loss = 5.8492e-02, PNorm = 111.1113, GNorm = 0.4786, lr_0 = 8.3731e-04
Loss = 4.8029e-02, PNorm = 111.1717, GNorm = 0.3268, lr_0 = 8.3674e-04
Loss = 5.9457e-02, PNorm = 111.2475, GNorm = 1.1334, lr_0 = 8.3616e-04
Loss = 5.7170e-02, PNorm = 111.3125, GNorm = 0.3230, lr_0 = 8.3559e-04
Loss = 4.9867e-02, PNorm = 111.3784, GNorm = 0.5794, lr_0 = 8.3502e-04
Loss = 5.6343e-02, PNorm = 111.4539, GNorm = 0.9107, lr_0 = 8.3445e-04
Loss = 5.7819e-02, PNorm = 111.5175, GNorm = 0.3349, lr_0 = 8.3388e-04
Loss = 6.4284e-02, PNorm = 111.5889, GNorm = 0.4814, lr_0 = 8.3330e-04
Loss = 5.0747e-02, PNorm = 111.6623, GNorm = 0.4456, lr_0 = 8.3273e-04
Loss = 4.7018e-02, PNorm = 111.7293, GNorm = 0.6695, lr_0 = 8.3216e-04
Loss = 5.4679e-02, PNorm = 111.8005, GNorm = 0.3550, lr_0 = 8.3159e-04
Loss = 5.2946e-02, PNorm = 111.8664, GNorm = 0.3611, lr_0 = 8.3102e-04
Loss = 5.3688e-02, PNorm = 111.9329, GNorm = 0.4235, lr_0 = 8.3045e-04
Loss = 4.8910e-02, PNorm = 112.0013, GNorm = 0.5508, lr_0 = 8.2988e-04
Loss = 5.2313e-02, PNorm = 112.0690, GNorm = 0.3265, lr_0 = 8.2932e-04
Loss = 6.0069e-02, PNorm = 112.1492, GNorm = 0.5644, lr_0 = 8.2875e-04
Loss = 5.7684e-02, PNorm = 112.2172, GNorm = 0.4358, lr_0 = 8.2818e-04
Loss = 5.1216e-02, PNorm = 112.2948, GNorm = 0.9125, lr_0 = 8.2761e-04
Loss = 4.5501e-02, PNorm = 112.3665, GNorm = 0.6575, lr_0 = 8.2705e-04
Loss = 5.5084e-02, PNorm = 112.4388, GNorm = 0.4469, lr_0 = 8.2648e-04
Loss = 5.3490e-02, PNorm = 112.5164, GNorm = 0.4338, lr_0 = 8.2591e-04
Loss = 5.3613e-02, PNorm = 112.5927, GNorm = 0.8831, lr_0 = 8.2535e-04
Loss = 4.9348e-02, PNorm = 112.6656, GNorm = 0.3951, lr_0 = 8.2478e-04
Loss = 5.5007e-02, PNorm = 112.7388, GNorm = 0.3357, lr_0 = 8.2422e-04
Loss = 5.0249e-02, PNorm = 112.8111, GNorm = 0.4170, lr_0 = 8.2365e-04
Loss = 5.8867e-02, PNorm = 112.8773, GNorm = 0.2827, lr_0 = 8.2309e-04
Loss = 5.6480e-02, PNorm = 112.9461, GNorm = 0.5310, lr_0 = 8.2252e-04
Loss = 6.1610e-02, PNorm = 113.0120, GNorm = 0.4842, lr_0 = 8.2196e-04
Loss = 5.2512e-02, PNorm = 113.0864, GNorm = 0.3275, lr_0 = 8.2140e-04
Loss = 5.6083e-02, PNorm = 113.1687, GNorm = 0.3253, lr_0 = 8.2084e-04
Loss = 5.5093e-02, PNorm = 113.2466, GNorm = 0.4228, lr_0 = 8.2027e-04
Loss = 6.5961e-02, PNorm = 113.3180, GNorm = 0.7784, lr_0 = 8.1971e-04
Loss = 6.6680e-02, PNorm = 113.4109, GNorm = 0.3293, lr_0 = 8.1915e-04
Loss = 5.8109e-02, PNorm = 113.5043, GNorm = 0.4494, lr_0 = 8.1859e-04
Loss = 5.6371e-02, PNorm = 113.5905, GNorm = 0.4211, lr_0 = 8.1803e-04
Loss = 5.6453e-02, PNorm = 113.6613, GNorm = 0.3335, lr_0 = 8.1747e-04
Loss = 5.9561e-02, PNorm = 113.7461, GNorm = 0.5000, lr_0 = 8.1691e-04
Loss = 5.5880e-02, PNorm = 113.8198, GNorm = 0.3396, lr_0 = 8.1635e-04
Loss = 6.3202e-02, PNorm = 113.8964, GNorm = 0.4354, lr_0 = 8.1579e-04
Loss = 6.4340e-02, PNorm = 113.9751, GNorm = 0.8555, lr_0 = 8.1523e-04
Loss = 5.9784e-02, PNorm = 114.0499, GNorm = 0.9227, lr_0 = 8.1467e-04
Loss = 5.7835e-02, PNorm = 114.1314, GNorm = 0.6461, lr_0 = 8.1411e-04
Loss = 5.6226e-02, PNorm = 114.2075, GNorm = 0.7325, lr_0 = 8.1355e-04
Loss = 6.6313e-02, PNorm = 114.2860, GNorm = 0.3311, lr_0 = 8.1300e-04
Loss = 5.4124e-02, PNorm = 114.3677, GNorm = 0.5355, lr_0 = 8.1244e-04
Loss = 7.1133e-02, PNorm = 114.4480, GNorm = 0.5975, lr_0 = 8.1188e-04
Loss = 5.7786e-02, PNorm = 114.5424, GNorm = 0.8626, lr_0 = 8.1133e-04
Loss = 6.2717e-02, PNorm = 114.6275, GNorm = 0.3439, lr_0 = 8.1077e-04
Loss = 6.0004e-02, PNorm = 114.7142, GNorm = 0.2932, lr_0 = 8.1022e-04
Loss = 5.9062e-02, PNorm = 114.7909, GNorm = 0.9480, lr_0 = 8.0966e-04
Loss = 6.7988e-02, PNorm = 114.8760, GNorm = 0.4488, lr_0 = 8.0911e-04
Loss = 6.2826e-02, PNorm = 114.9547, GNorm = 0.6073, lr_0 = 8.0855e-04
Loss = 5.7152e-02, PNorm = 115.0411, GNorm = 0.6532, lr_0 = 8.0800e-04
Loss = 6.4170e-02, PNorm = 115.1239, GNorm = 0.3199, lr_0 = 8.0745e-04
Loss = 5.9358e-02, PNorm = 115.2147, GNorm = 0.5549, lr_0 = 8.0689e-04
Loss = 5.8418e-02, PNorm = 115.2894, GNorm = 0.9305, lr_0 = 8.0634e-04
Loss = 6.2080e-02, PNorm = 115.3819, GNorm = 0.9463, lr_0 = 8.0579e-04
Loss = 6.4340e-02, PNorm = 115.4622, GNorm = 0.4869, lr_0 = 8.0523e-04
Loss = 5.3601e-02, PNorm = 115.5550, GNorm = 0.5215, lr_0 = 8.0468e-04
Loss = 7.0085e-02, PNorm = 115.6375, GNorm = 0.5811, lr_0 = 8.0413e-04
Loss = 5.9645e-02, PNorm = 115.7299, GNorm = 0.3626, lr_0 = 8.0358e-04
Loss = 6.6559e-02, PNorm = 115.8061, GNorm = 0.8772, lr_0 = 8.0303e-04
Loss = 7.5352e-02, PNorm = 115.9000, GNorm = 1.5374, lr_0 = 8.0248e-04
Loss = 5.8344e-02, PNorm = 115.9791, GNorm = 0.5834, lr_0 = 8.0193e-04
Loss = 6.8747e-02, PNorm = 116.0686, GNorm = 0.8712, lr_0 = 8.0138e-04
Loss = 5.7523e-02, PNorm = 116.1524, GNorm = 0.5239, lr_0 = 8.0083e-04
Loss = 5.8916e-02, PNorm = 116.2379, GNorm = 0.4164, lr_0 = 8.0028e-04
Loss = 6.2808e-02, PNorm = 116.3246, GNorm = 0.5603, lr_0 = 7.9974e-04
Loss = 5.7533e-02, PNorm = 116.4133, GNorm = 0.6619, lr_0 = 7.9919e-04
Loss = 6.4483e-02, PNorm = 116.5142, GNorm = 0.6897, lr_0 = 7.9864e-04
Loss = 7.1674e-02, PNorm = 116.5966, GNorm = 0.7840, lr_0 = 7.9809e-04
Loss = 5.8667e-02, PNorm = 116.6889, GNorm = 0.5766, lr_0 = 7.9755e-04
Loss = 7.5086e-02, PNorm = 116.7768, GNorm = 0.8002, lr_0 = 7.9700e-04
Loss = 6.4200e-02, PNorm = 116.8745, GNorm = 0.5393, lr_0 = 7.9645e-04
Loss = 7.2950e-02, PNorm = 116.9656, GNorm = 0.5398, lr_0 = 7.9591e-04
Loss = 7.1781e-02, PNorm = 117.0650, GNorm = 0.5733, lr_0 = 7.9536e-04
Loss = 7.0223e-02, PNorm = 117.1519, GNorm = 0.9630, lr_0 = 7.9482e-04
Loss = 6.3742e-02, PNorm = 117.2436, GNorm = 0.5180, lr_0 = 7.9427e-04
Loss = 6.9851e-02, PNorm = 117.3344, GNorm = 0.4443, lr_0 = 7.9373e-04
Loss = 6.5889e-02, PNorm = 117.4397, GNorm = 0.5065, lr_0 = 7.9319e-04
Loss = 5.9566e-02, PNorm = 117.5320, GNorm = 0.3767, lr_0 = 7.9264e-04
Loss = 7.3826e-02, PNorm = 117.6300, GNorm = 0.5983, lr_0 = 7.9210e-04
Loss = 6.0783e-02, PNorm = 117.7230, GNorm = 0.4064, lr_0 = 7.9156e-04
Loss = 6.8298e-02, PNorm = 117.8130, GNorm = 0.9481, lr_0 = 7.9101e-04
Loss = 6.1850e-02, PNorm = 117.8983, GNorm = 0.6394, lr_0 = 7.9047e-04
Loss = 6.5906e-02, PNorm = 118.0022, GNorm = 0.7119, lr_0 = 7.8993e-04
Loss = 7.3118e-02, PNorm = 118.0988, GNorm = 0.3720, lr_0 = 7.8939e-04
Loss = 5.9947e-02, PNorm = 118.1889, GNorm = 1.1547, lr_0 = 7.8885e-04
Loss = 6.4712e-02, PNorm = 118.2728, GNorm = 0.5401, lr_0 = 7.8831e-04
Loss = 6.9610e-02, PNorm = 118.3629, GNorm = 0.8323, lr_0 = 7.8777e-04
Loss = 7.2508e-02, PNorm = 118.4600, GNorm = 1.2055, lr_0 = 7.8723e-04
Loss = 7.3075e-02, PNorm = 118.5652, GNorm = 1.1111, lr_0 = 7.8669e-04
Loss = 7.1496e-02, PNorm = 118.6733, GNorm = 0.5055, lr_0 = 7.8615e-04
Loss = 6.7410e-02, PNorm = 118.7748, GNorm = 0.9048, lr_0 = 7.8561e-04
Loss = 7.0066e-02, PNorm = 118.8710, GNorm = 0.6799, lr_0 = 7.8507e-04
Loss = 6.7604e-02, PNorm = 118.9700, GNorm = 0.6985, lr_0 = 7.8454e-04
Loss = 6.5172e-02, PNorm = 119.0710, GNorm = 0.5066, lr_0 = 7.8400e-04
Loss = 6.7449e-02, PNorm = 119.1646, GNorm = 0.6144, lr_0 = 7.8346e-04
Loss = 6.6507e-02, PNorm = 119.2587, GNorm = 0.4142, lr_0 = 7.8293e-04
Loss = 7.1199e-02, PNorm = 119.3527, GNorm = 0.4729, lr_0 = 7.8239e-04
Loss = 6.2191e-02, PNorm = 119.4494, GNorm = 0.3200, lr_0 = 7.8185e-04
Loss = 7.0998e-02, PNorm = 119.5472, GNorm = 0.8510, lr_0 = 7.8132e-04
Validation mae = 0.292286
Epoch 5
Loss = 4.5500e-02, PNorm = 119.6277, GNorm = 0.3962, lr_0 = 7.8078e-04
Loss = 5.0547e-02, PNorm = 119.7034, GNorm = 0.7330, lr_0 = 7.8025e-04
Loss = 4.7931e-02, PNorm = 119.7665, GNorm = 0.3192, lr_0 = 7.7971e-04
Loss = 4.5491e-02, PNorm = 119.8461, GNorm = 0.2559, lr_0 = 7.7918e-04
Loss = 4.4696e-02, PNorm = 119.9103, GNorm = 0.2368, lr_0 = 7.7864e-04
Loss = 3.9182e-02, PNorm = 119.9755, GNorm = 0.7986, lr_0 = 7.7811e-04
Loss = 4.5598e-02, PNorm = 120.0380, GNorm = 0.3879, lr_0 = 7.7758e-04
Loss = 3.8201e-02, PNorm = 120.0982, GNorm = 0.3620, lr_0 = 7.7705e-04
Loss = 3.6633e-02, PNorm = 120.1539, GNorm = 0.2568, lr_0 = 7.7651e-04
Loss = 4.7893e-02, PNorm = 120.2123, GNorm = 0.4256, lr_0 = 7.7598e-04
Loss = 4.0671e-02, PNorm = 120.2641, GNorm = 0.5529, lr_0 = 7.7545e-04
Loss = 5.0500e-02, PNorm = 120.3281, GNorm = 0.8127, lr_0 = 7.7492e-04
Loss = 4.7598e-02, PNorm = 120.3753, GNorm = 0.7109, lr_0 = 7.7439e-04
Loss = 5.0600e-02, PNorm = 120.4350, GNorm = 0.6981, lr_0 = 7.7386e-04
Loss = 4.6293e-02, PNorm = 120.5003, GNorm = 0.2973, lr_0 = 7.7333e-04
Loss = 4.4021e-02, PNorm = 120.5631, GNorm = 0.3892, lr_0 = 7.7280e-04
Loss = 4.7510e-02, PNorm = 120.6303, GNorm = 1.0381, lr_0 = 7.7227e-04
Loss = 4.6559e-02, PNorm = 120.6962, GNorm = 0.2549, lr_0 = 7.7174e-04
Loss = 4.1922e-02, PNorm = 120.7634, GNorm = 0.4008, lr_0 = 7.7121e-04
Loss = 4.2642e-02, PNorm = 120.8220, GNorm = 0.3399, lr_0 = 7.7068e-04
Loss = 4.1107e-02, PNorm = 120.8831, GNorm = 0.6141, lr_0 = 7.7015e-04
Loss = 4.2087e-02, PNorm = 120.9334, GNorm = 0.6007, lr_0 = 7.6963e-04
Loss = 5.5473e-02, PNorm = 120.9927, GNorm = 0.2558, lr_0 = 7.6910e-04
Loss = 4.3874e-02, PNorm = 121.0464, GNorm = 0.4753, lr_0 = 7.6857e-04
Loss = 4.4411e-02, PNorm = 121.1186, GNorm = 0.4826, lr_0 = 7.6805e-04
Loss = 4.5466e-02, PNorm = 121.1762, GNorm = 0.9136, lr_0 = 7.6752e-04
Loss = 4.0720e-02, PNorm = 121.2505, GNorm = 0.6363, lr_0 = 7.6699e-04
Loss = 4.2823e-02, PNorm = 121.3175, GNorm = 1.0852, lr_0 = 7.6647e-04
Loss = 4.0631e-02, PNorm = 121.3750, GNorm = 0.4019, lr_0 = 7.6594e-04
Loss = 3.9639e-02, PNorm = 121.4364, GNorm = 0.2932, lr_0 = 7.6542e-04
Loss = 4.1417e-02, PNorm = 121.4953, GNorm = 0.7279, lr_0 = 7.6489e-04
Loss = 4.3565e-02, PNorm = 121.5592, GNorm = 0.3075, lr_0 = 7.6437e-04
Loss = 4.7955e-02, PNorm = 121.6148, GNorm = 0.3855, lr_0 = 7.6385e-04
Loss = 4.4846e-02, PNorm = 121.6795, GNorm = 0.4135, lr_0 = 7.6332e-04
Loss = 3.8477e-02, PNorm = 121.7334, GNorm = 0.4500, lr_0 = 7.6280e-04
Loss = 5.0281e-02, PNorm = 121.7937, GNorm = 0.5259, lr_0 = 7.6228e-04
Loss = 4.3341e-02, PNorm = 121.8567, GNorm = 0.3404, lr_0 = 7.6176e-04
Loss = 3.4125e-02, PNorm = 121.9256, GNorm = 0.2324, lr_0 = 7.6123e-04
Loss = 4.5702e-02, PNorm = 121.9820, GNorm = 0.3541, lr_0 = 7.6071e-04
Loss = 4.1182e-02, PNorm = 122.0410, GNorm = 0.6020, lr_0 = 7.6019e-04
Loss = 4.2844e-02, PNorm = 122.1055, GNorm = 0.2934, lr_0 = 7.5967e-04
Loss = 4.3909e-02, PNorm = 122.1779, GNorm = 0.3923, lr_0 = 7.5915e-04
Loss = 4.0581e-02, PNorm = 122.2464, GNorm = 0.6131, lr_0 = 7.5863e-04
Loss = 4.7804e-02, PNorm = 122.3130, GNorm = 0.4100, lr_0 = 7.5811e-04
Loss = 4.4355e-02, PNorm = 122.3749, GNorm = 1.1242, lr_0 = 7.5759e-04
Loss = 4.5036e-02, PNorm = 122.4348, GNorm = 0.5998, lr_0 = 7.5707e-04
Loss = 4.3536e-02, PNorm = 122.5004, GNorm = 0.8525, lr_0 = 7.5655e-04
Loss = 4.0616e-02, PNorm = 122.5703, GNorm = 0.5093, lr_0 = 7.5603e-04
Loss = 3.9574e-02, PNorm = 122.6366, GNorm = 0.3740, lr_0 = 7.5552e-04
Loss = 4.2784e-02, PNorm = 122.7018, GNorm = 0.3888, lr_0 = 7.5500e-04
Loss = 4.3682e-02, PNorm = 122.7667, GNorm = 0.4585, lr_0 = 7.5448e-04
Loss = 4.2313e-02, PNorm = 122.8385, GNorm = 0.5264, lr_0 = 7.5397e-04
Loss = 4.2946e-02, PNorm = 122.9102, GNorm = 0.7766, lr_0 = 7.5345e-04
Loss = 4.1856e-02, PNorm = 122.9782, GNorm = 0.3641, lr_0 = 7.5293e-04
Loss = 4.2642e-02, PNorm = 123.0404, GNorm = 0.2892, lr_0 = 7.5242e-04
Loss = 3.8664e-02, PNorm = 123.1112, GNorm = 0.2734, lr_0 = 7.5190e-04
Loss = 4.5481e-02, PNorm = 123.1761, GNorm = 0.4694, lr_0 = 7.5139e-04
Loss = 4.2155e-02, PNorm = 123.2415, GNorm = 0.6924, lr_0 = 7.5087e-04
Loss = 4.6504e-02, PNorm = 123.3012, GNorm = 0.3656, lr_0 = 7.5036e-04
Loss = 4.4581e-02, PNorm = 123.3698, GNorm = 0.7314, lr_0 = 7.4984e-04
Loss = 4.6579e-02, PNorm = 123.4409, GNorm = 0.5007, lr_0 = 7.4933e-04
Loss = 4.8823e-02, PNorm = 123.5204, GNorm = 0.6539, lr_0 = 7.4882e-04
Loss = 3.9935e-02, PNorm = 123.5993, GNorm = 0.6952, lr_0 = 7.4830e-04
Loss = 4.5122e-02, PNorm = 123.6792, GNorm = 0.3666, lr_0 = 7.4779e-04
Loss = 3.7016e-02, PNorm = 123.7430, GNorm = 0.4262, lr_0 = 7.4728e-04
Loss = 3.9127e-02, PNorm = 123.8119, GNorm = 0.3437, lr_0 = 7.4677e-04
Loss = 3.8607e-02, PNorm = 123.8750, GNorm = 0.7615, lr_0 = 7.4625e-04
Loss = 4.2025e-02, PNorm = 123.9418, GNorm = 0.5871, lr_0 = 7.4574e-04
Loss = 4.4517e-02, PNorm = 124.0177, GNorm = 0.3452, lr_0 = 7.4523e-04
Loss = 4.3979e-02, PNorm = 124.0821, GNorm = 0.3849, lr_0 = 7.4472e-04
Loss = 4.0461e-02, PNorm = 124.1586, GNorm = 0.5541, lr_0 = 7.4421e-04
Loss = 4.0658e-02, PNorm = 124.2299, GNorm = 0.3474, lr_0 = 7.4370e-04
Loss = 4.2428e-02, PNorm = 124.3045, GNorm = 0.2979, lr_0 = 7.4319e-04
Loss = 3.7218e-02, PNorm = 124.3763, GNorm = 0.2379, lr_0 = 7.4268e-04
Loss = 4.0630e-02, PNorm = 124.4497, GNorm = 0.5009, lr_0 = 7.4217e-04
Loss = 4.0526e-02, PNorm = 124.5148, GNorm = 0.6362, lr_0 = 7.4167e-04
Loss = 4.5022e-02, PNorm = 124.5966, GNorm = 0.6555, lr_0 = 7.4116e-04
Loss = 3.8997e-02, PNorm = 124.6671, GNorm = 0.9778, lr_0 = 7.4065e-04
Loss = 4.6331e-02, PNorm = 124.7402, GNorm = 0.3835, lr_0 = 7.4014e-04
Loss = 6.0028e-02, PNorm = 124.8127, GNorm = 0.6628, lr_0 = 7.3964e-04
Loss = 4.6608e-02, PNorm = 124.8904, GNorm = 0.4526, lr_0 = 7.3913e-04
Loss = 3.7236e-02, PNorm = 124.9645, GNorm = 0.4191, lr_0 = 7.3862e-04
Loss = 4.9872e-02, PNorm = 125.0458, GNorm = 0.7371, lr_0 = 7.3812e-04
Loss = 4.4678e-02, PNorm = 125.1303, GNorm = 0.3443, lr_0 = 7.3761e-04
Loss = 5.0247e-02, PNorm = 125.2226, GNorm = 0.5139, lr_0 = 7.3711e-04
Loss = 4.3435e-02, PNorm = 125.3014, GNorm = 0.2797, lr_0 = 7.3660e-04
Loss = 4.4455e-02, PNorm = 125.3854, GNorm = 0.6317, lr_0 = 7.3610e-04
Loss = 4.3090e-02, PNorm = 125.4544, GNorm = 0.5605, lr_0 = 7.3559e-04
Loss = 4.3212e-02, PNorm = 125.5234, GNorm = 0.3625, lr_0 = 7.3509e-04
Loss = 4.7150e-02, PNorm = 125.5926, GNorm = 0.2744, lr_0 = 7.3458e-04
Loss = 4.3076e-02, PNorm = 125.6721, GNorm = 0.2779, lr_0 = 7.3408e-04
Loss = 4.3713e-02, PNorm = 125.7407, GNorm = 0.3133, lr_0 = 7.3358e-04
Loss = 4.6444e-02, PNorm = 125.8174, GNorm = 0.2512, lr_0 = 7.3308e-04
Loss = 5.1684e-02, PNorm = 125.8870, GNorm = 0.3091, lr_0 = 7.3257e-04
Loss = 4.4846e-02, PNorm = 125.9713, GNorm = 0.8691, lr_0 = 7.3207e-04
Loss = 5.6231e-02, PNorm = 126.0474, GNorm = 0.5201, lr_0 = 7.3157e-04
Loss = 4.4801e-02, PNorm = 126.1315, GNorm = 0.6219, lr_0 = 7.3107e-04
Loss = 4.8490e-02, PNorm = 126.2021, GNorm = 0.2861, lr_0 = 7.3057e-04
Loss = 4.8233e-02, PNorm = 126.2734, GNorm = 0.3032, lr_0 = 7.3007e-04
Loss = 5.2769e-02, PNorm = 126.3439, GNorm = 0.4852, lr_0 = 7.2957e-04
Loss = 4.6757e-02, PNorm = 126.4287, GNorm = 0.7435, lr_0 = 7.2907e-04
Loss = 4.5384e-02, PNorm = 126.5015, GNorm = 0.5636, lr_0 = 7.2857e-04
Loss = 4.4909e-02, PNorm = 126.5732, GNorm = 0.4576, lr_0 = 7.2807e-04
Loss = 4.4269e-02, PNorm = 126.6478, GNorm = 0.5914, lr_0 = 7.2757e-04
Loss = 4.9662e-02, PNorm = 126.7215, GNorm = 1.0368, lr_0 = 7.2707e-04
Loss = 4.3997e-02, PNorm = 126.7926, GNorm = 0.3573, lr_0 = 7.2657e-04
Loss = 4.6962e-02, PNorm = 126.8668, GNorm = 0.2858, lr_0 = 7.2608e-04
Loss = 4.8496e-02, PNorm = 126.9423, GNorm = 0.4633, lr_0 = 7.2558e-04
Loss = 4.3087e-02, PNorm = 127.0195, GNorm = 0.3416, lr_0 = 7.2508e-04
Loss = 4.4322e-02, PNorm = 127.0924, GNorm = 0.3893, lr_0 = 7.2458e-04
Loss = 4.7945e-02, PNorm = 127.1727, GNorm = 0.3995, lr_0 = 7.2409e-04
Loss = 5.6462e-02, PNorm = 127.2500, GNorm = 0.4054, lr_0 = 7.2359e-04
Loss = 3.4434e-02, PNorm = 127.3281, GNorm = 0.4317, lr_0 = 7.2310e-04
Loss = 4.8839e-02, PNorm = 127.4078, GNorm = 0.8186, lr_0 = 7.2260e-04
Loss = 5.1916e-02, PNorm = 127.4863, GNorm = 0.6952, lr_0 = 7.2211e-04
Loss = 4.1219e-02, PNorm = 127.5655, GNorm = 0.7765, lr_0 = 7.2161e-04
Loss = 5.0476e-02, PNorm = 127.6385, GNorm = 0.3448, lr_0 = 7.2112e-04
Loss = 4.3008e-02, PNorm = 127.7148, GNorm = 0.6815, lr_0 = 7.2062e-04
Loss = 5.3742e-02, PNorm = 127.7928, GNorm = 0.9255, lr_0 = 7.2013e-04
Loss = 4.2465e-02, PNorm = 127.8663, GNorm = 0.8164, lr_0 = 7.1964e-04
Validation mae = 0.290374
Epoch 6
Loss = 3.7384e-02, PNorm = 127.9192, GNorm = 0.3425, lr_0 = 7.1914e-04
Loss = 3.8622e-02, PNorm = 127.9740, GNorm = 0.2550, lr_0 = 7.1865e-04
Loss = 3.2223e-02, PNorm = 128.0232, GNorm = 0.2738, lr_0 = 7.1816e-04
Loss = 3.0173e-02, PNorm = 128.0674, GNorm = 0.4577, lr_0 = 7.1767e-04
Loss = 3.7461e-02, PNorm = 128.1197, GNorm = 0.3260, lr_0 = 7.1717e-04
Loss = 3.1473e-02, PNorm = 128.1674, GNorm = 0.1863, lr_0 = 7.1668e-04
Loss = 3.4263e-02, PNorm = 128.2224, GNorm = 0.7714, lr_0 = 7.1619e-04
Loss = 3.2489e-02, PNorm = 128.2685, GNorm = 0.5952, lr_0 = 7.1570e-04
Loss = 3.2252e-02, PNorm = 128.3230, GNorm = 0.4754, lr_0 = 7.1521e-04
Loss = 3.2036e-02, PNorm = 128.3709, GNorm = 0.4931, lr_0 = 7.1472e-04
Loss = 2.7682e-02, PNorm = 128.4202, GNorm = 0.4973, lr_0 = 7.1423e-04
Loss = 2.8881e-02, PNorm = 128.4593, GNorm = 0.2498, lr_0 = 7.1374e-04
Loss = 3.5974e-02, PNorm = 128.5030, GNorm = 0.5966, lr_0 = 7.1325e-04
Loss = 3.7271e-02, PNorm = 128.5510, GNorm = 1.8264, lr_0 = 7.1277e-04
Loss = 3.1356e-02, PNorm = 128.6012, GNorm = 0.9220, lr_0 = 7.1228e-04
Loss = 3.2804e-02, PNorm = 128.6570, GNorm = 0.7935, lr_0 = 7.1179e-04
Loss = 3.5151e-02, PNorm = 128.7109, GNorm = 0.4028, lr_0 = 7.1130e-04
Loss = 3.9435e-02, PNorm = 128.7661, GNorm = 0.3894, lr_0 = 7.1081e-04
Loss = 3.8717e-02, PNorm = 128.8158, GNorm = 0.4655, lr_0 = 7.1033e-04
Loss = 4.0410e-02, PNorm = 128.8620, GNorm = 0.9000, lr_0 = 7.0984e-04
Loss = 3.1930e-02, PNorm = 128.9123, GNorm = 0.7638, lr_0 = 7.0935e-04
Loss = 3.0987e-02, PNorm = 128.9705, GNorm = 0.2081, lr_0 = 7.0887e-04
Loss = 3.3671e-02, PNorm = 129.0266, GNorm = 0.2765, lr_0 = 7.0838e-04
Loss = 3.1141e-02, PNorm = 129.0762, GNorm = 0.2744, lr_0 = 7.0790e-04
Loss = 3.2872e-02, PNorm = 129.1246, GNorm = 0.2960, lr_0 = 7.0741e-04
Loss = 3.5352e-02, PNorm = 129.1765, GNorm = 0.3391, lr_0 = 7.0693e-04
Loss = 3.2997e-02, PNorm = 129.2281, GNorm = 0.5515, lr_0 = 7.0644e-04
Loss = 2.8287e-02, PNorm = 129.2836, GNorm = 0.5746, lr_0 = 7.0596e-04
Loss = 2.9143e-02, PNorm = 129.3353, GNorm = 0.4293, lr_0 = 7.0548e-04
Loss = 2.5985e-02, PNorm = 129.3870, GNorm = 0.5884, lr_0 = 7.0499e-04
Loss = 3.1043e-02, PNorm = 129.4256, GNorm = 0.3945, lr_0 = 7.0451e-04
Loss = 3.1411e-02, PNorm = 129.4715, GNorm = 0.3671, lr_0 = 7.0403e-04
Loss = 2.8776e-02, PNorm = 129.5201, GNorm = 0.2356, lr_0 = 7.0354e-04
Loss = 2.9586e-02, PNorm = 129.5745, GNorm = 0.6216, lr_0 = 7.0306e-04
Loss = 3.2078e-02, PNorm = 129.6218, GNorm = 0.7108, lr_0 = 7.0258e-04
Loss = 3.0454e-02, PNorm = 129.6737, GNorm = 0.6989, lr_0 = 7.0210e-04
Loss = 3.0012e-02, PNorm = 129.7210, GNorm = 0.4992, lr_0 = 7.0162e-04
Loss = 3.6165e-02, PNorm = 129.7760, GNorm = 0.5521, lr_0 = 7.0114e-04
Loss = 2.8430e-02, PNorm = 129.8302, GNorm = 0.5250, lr_0 = 7.0066e-04
Loss = 3.1945e-02, PNorm = 129.8805, GNorm = 0.2984, lr_0 = 7.0018e-04
Loss = 3.2076e-02, PNorm = 129.9407, GNorm = 0.6682, lr_0 = 6.9970e-04
Loss = 3.8649e-02, PNorm = 129.9924, GNorm = 0.4471, lr_0 = 6.9922e-04
Loss = 3.1873e-02, PNorm = 130.0551, GNorm = 0.2304, lr_0 = 6.9874e-04
Loss = 3.4249e-02, PNorm = 130.1049, GNorm = 0.7895, lr_0 = 6.9826e-04
Loss = 4.1189e-02, PNorm = 130.1675, GNorm = 0.5631, lr_0 = 6.9778e-04
Loss = 3.7505e-02, PNorm = 130.2261, GNorm = 1.0489, lr_0 = 6.9730e-04
Loss = 2.9755e-02, PNorm = 130.2935, GNorm = 0.3584, lr_0 = 6.9683e-04
Loss = 3.3689e-02, PNorm = 130.3534, GNorm = 0.4333, lr_0 = 6.9635e-04
Loss = 3.0415e-02, PNorm = 130.4169, GNorm = 0.8196, lr_0 = 6.9587e-04
Loss = 3.4242e-02, PNorm = 130.4781, GNorm = 0.3814, lr_0 = 6.9540e-04
Loss = 2.8458e-02, PNorm = 130.5325, GNorm = 0.7142, lr_0 = 6.9492e-04
Loss = 3.1477e-02, PNorm = 130.5908, GNorm = 0.5911, lr_0 = 6.9444e-04
Loss = 2.9572e-02, PNorm = 130.6482, GNorm = 0.5696, lr_0 = 6.9397e-04
Loss = 3.1694e-02, PNorm = 130.7070, GNorm = 0.4083, lr_0 = 6.9349e-04
Loss = 3.1370e-02, PNorm = 130.7675, GNorm = 0.5793, lr_0 = 6.9302e-04
Loss = 3.2071e-02, PNorm = 130.8205, GNorm = 0.6819, lr_0 = 6.9254e-04
Loss = 3.1395e-02, PNorm = 130.8703, GNorm = 0.3155, lr_0 = 6.9207e-04
Loss = 3.1773e-02, PNorm = 130.9281, GNorm = 0.2873, lr_0 = 6.9159e-04
Loss = 3.3408e-02, PNorm = 130.9804, GNorm = 0.6515, lr_0 = 6.9112e-04
Loss = 3.3309e-02, PNorm = 131.0476, GNorm = 0.4541, lr_0 = 6.9065e-04
Loss = 3.3374e-02, PNorm = 131.1007, GNorm = 0.4766, lr_0 = 6.9017e-04
Loss = 3.4672e-02, PNorm = 131.1629, GNorm = 1.0033, lr_0 = 6.8970e-04
Loss = 3.2838e-02, PNorm = 131.2195, GNorm = 0.5261, lr_0 = 6.8923e-04
Loss = 3.1969e-02, PNorm = 131.2855, GNorm = 0.2140, lr_0 = 6.8876e-04
Loss = 3.3650e-02, PNorm = 131.3419, GNorm = 0.5485, lr_0 = 6.8828e-04
Loss = 3.0837e-02, PNorm = 131.4044, GNorm = 0.3676, lr_0 = 6.8781e-04
Loss = 3.6868e-02, PNorm = 131.4677, GNorm = 1.2591, lr_0 = 6.8734e-04
Loss = 3.3617e-02, PNorm = 131.5370, GNorm = 0.6066, lr_0 = 6.8687e-04
Loss = 2.9386e-02, PNorm = 131.5961, GNorm = 0.7812, lr_0 = 6.8640e-04
Loss = 3.1770e-02, PNorm = 131.6569, GNorm = 0.4792, lr_0 = 6.8593e-04
Loss = 2.9971e-02, PNorm = 131.7123, GNorm = 0.3419, lr_0 = 6.8546e-04
Loss = 3.2610e-02, PNorm = 131.7689, GNorm = 0.5145, lr_0 = 6.8499e-04
Loss = 3.2136e-02, PNorm = 131.8263, GNorm = 0.2623, lr_0 = 6.8452e-04
Loss = 3.2348e-02, PNorm = 131.8794, GNorm = 0.7648, lr_0 = 6.8405e-04
Loss = 3.1742e-02, PNorm = 131.9403, GNorm = 0.3583, lr_0 = 6.8358e-04
Loss = 2.8250e-02, PNorm = 131.9925, GNorm = 0.2300, lr_0 = 6.8312e-04
Loss = 3.7255e-02, PNorm = 132.0449, GNorm = 0.5103, lr_0 = 6.8265e-04
Loss = 3.5037e-02, PNorm = 132.1111, GNorm = 0.4021, lr_0 = 6.8218e-04
Loss = 3.6702e-02, PNorm = 132.1822, GNorm = 0.2904, lr_0 = 6.8171e-04
Loss = 3.3797e-02, PNorm = 132.2525, GNorm = 0.3105, lr_0 = 6.8125e-04
Loss = 3.1641e-02, PNorm = 132.3129, GNorm = 0.2655, lr_0 = 6.8078e-04
Loss = 3.6346e-02, PNorm = 132.3740, GNorm = 0.4539, lr_0 = 6.8031e-04
Loss = 3.5980e-02, PNorm = 132.4387, GNorm = 0.3263, lr_0 = 6.7985e-04
Loss = 3.5119e-02, PNorm = 132.5055, GNorm = 0.6829, lr_0 = 6.7938e-04
Loss = 3.6158e-02, PNorm = 132.5729, GNorm = 0.7056, lr_0 = 6.7892e-04
Loss = 4.2113e-02, PNorm = 132.6422, GNorm = 0.5536, lr_0 = 6.7845e-04
Loss = 3.2343e-02, PNorm = 132.7082, GNorm = 0.7098, lr_0 = 6.7799e-04
Loss = 2.6944e-02, PNorm = 132.7729, GNorm = 0.2455, lr_0 = 6.7752e-04
Loss = 3.3469e-02, PNorm = 132.8325, GNorm = 0.2581, lr_0 = 6.7706e-04
Loss = 3.4307e-02, PNorm = 132.8980, GNorm = 0.5381, lr_0 = 6.7659e-04
Loss = 3.8267e-02, PNorm = 132.9628, GNorm = 0.3108, lr_0 = 6.7613e-04
Loss = 3.5318e-02, PNorm = 133.0338, GNorm = 0.6796, lr_0 = 6.7567e-04
Loss = 2.9962e-02, PNorm = 133.0986, GNorm = 0.3621, lr_0 = 6.7520e-04
Loss = 3.6124e-02, PNorm = 133.1640, GNorm = 0.4694, lr_0 = 6.7474e-04
Loss = 3.4408e-02, PNorm = 133.2270, GNorm = 0.3743, lr_0 = 6.7428e-04
Loss = 3.8998e-02, PNorm = 133.2925, GNorm = 0.3826, lr_0 = 6.7382e-04
Loss = 4.2557e-02, PNorm = 133.3661, GNorm = 0.2987, lr_0 = 6.7335e-04
Loss = 3.7108e-02, PNorm = 133.4333, GNorm = 0.3312, lr_0 = 6.7289e-04
Loss = 3.7722e-02, PNorm = 133.5007, GNorm = 0.2503, lr_0 = 6.7243e-04
Loss = 3.0926e-02, PNorm = 133.5606, GNorm = 0.3323, lr_0 = 6.7197e-04
Loss = 3.8380e-02, PNorm = 133.6235, GNorm = 0.4721, lr_0 = 6.7151e-04
Loss = 3.2022e-02, PNorm = 133.6905, GNorm = 0.3248, lr_0 = 6.7105e-04
Loss = 3.6707e-02, PNorm = 133.7538, GNorm = 0.2026, lr_0 = 6.7059e-04
Loss = 3.4550e-02, PNorm = 133.8218, GNorm = 0.6980, lr_0 = 6.7013e-04
Loss = 3.4697e-02, PNorm = 133.8909, GNorm = 0.4417, lr_0 = 6.6967e-04
Loss = 4.2319e-02, PNorm = 133.9568, GNorm = 0.5375, lr_0 = 6.6921e-04
Loss = 3.8193e-02, PNorm = 134.0306, GNorm = 0.7977, lr_0 = 6.6876e-04
Loss = 3.6691e-02, PNorm = 134.1093, GNorm = 0.7329, lr_0 = 6.6830e-04
Loss = 3.7520e-02, PNorm = 134.1865, GNorm = 0.3308, lr_0 = 6.6784e-04
Loss = 2.9655e-02, PNorm = 134.2515, GNorm = 0.3024, lr_0 = 6.6738e-04
Loss = 3.1521e-02, PNorm = 134.3131, GNorm = 0.2234, lr_0 = 6.6693e-04
Loss = 3.4190e-02, PNorm = 134.3739, GNorm = 0.9440, lr_0 = 6.6647e-04
Loss = 3.1445e-02, PNorm = 134.4388, GNorm = 0.4104, lr_0 = 6.6601e-04
Loss = 3.7036e-02, PNorm = 134.5071, GNorm = 0.5941, lr_0 = 6.6556e-04
Loss = 3.8109e-02, PNorm = 134.5670, GNorm = 0.2271, lr_0 = 6.6510e-04
Loss = 3.4893e-02, PNorm = 134.6364, GNorm = 0.2917, lr_0 = 6.6464e-04
Loss = 4.5300e-02, PNorm = 134.7029, GNorm = 0.8554, lr_0 = 6.6419e-04
Loss = 3.9850e-02, PNorm = 134.7769, GNorm = 0.7501, lr_0 = 6.6373e-04
Loss = 3.8208e-02, PNorm = 134.8473, GNorm = 0.3494, lr_0 = 6.6328e-04
Loss = 3.9738e-02, PNorm = 134.9239, GNorm = 1.1165, lr_0 = 6.6282e-04
Validation mae = 0.289952
Epoch 7
Loss = 3.1208e-02, PNorm = 134.9913, GNorm = 0.5477, lr_0 = 6.6237e-04
Loss = 3.2972e-02, PNorm = 135.0419, GNorm = 0.5332, lr_0 = 6.6192e-04
Loss = 2.6954e-02, PNorm = 135.0807, GNorm = 0.2976, lr_0 = 6.6146e-04
Loss = 3.1705e-02, PNorm = 135.1345, GNorm = 0.5228, lr_0 = 6.6101e-04
Loss = 2.8805e-02, PNorm = 135.1817, GNorm = 0.3839, lr_0 = 6.6056e-04
Loss = 2.6648e-02, PNorm = 135.2298, GNorm = 0.2717, lr_0 = 6.6011e-04
Loss = 2.6826e-02, PNorm = 135.2722, GNorm = 0.2861, lr_0 = 6.5965e-04
Loss = 2.4204e-02, PNorm = 135.3161, GNorm = 0.2765, lr_0 = 6.5920e-04
Loss = 2.6227e-02, PNorm = 135.3559, GNorm = 0.2083, lr_0 = 6.5875e-04
Loss = 2.8364e-02, PNorm = 135.4020, GNorm = 0.2560, lr_0 = 6.5830e-04
Loss = 2.8250e-02, PNorm = 135.4460, GNorm = 0.2961, lr_0 = 6.5785e-04
Loss = 2.3757e-02, PNorm = 135.4879, GNorm = 0.6129, lr_0 = 6.5740e-04
Loss = 2.5907e-02, PNorm = 135.5329, GNorm = 0.2040, lr_0 = 6.5695e-04
Loss = 2.4543e-02, PNorm = 135.5817, GNorm = 0.3936, lr_0 = 6.5650e-04
Loss = 2.8204e-02, PNorm = 135.6317, GNorm = 0.6333, lr_0 = 6.5605e-04
Loss = 2.4780e-02, PNorm = 135.6714, GNorm = 0.3871, lr_0 = 6.5560e-04
Loss = 2.6874e-02, PNorm = 135.7132, GNorm = 0.2588, lr_0 = 6.5515e-04
Loss = 3.1238e-02, PNorm = 135.7587, GNorm = 0.5860, lr_0 = 6.5470e-04
Loss = 2.5478e-02, PNorm = 135.8014, GNorm = 0.5820, lr_0 = 6.5425e-04
Loss = 2.8288e-02, PNorm = 135.8454, GNorm = 0.4348, lr_0 = 6.5380e-04
Loss = 2.6115e-02, PNorm = 135.8903, GNorm = 0.5524, lr_0 = 6.5335e-04
Loss = 2.7819e-02, PNorm = 135.9316, GNorm = 0.5717, lr_0 = 6.5291e-04
Loss = 2.4271e-02, PNorm = 135.9800, GNorm = 0.3548, lr_0 = 6.5246e-04
Loss = 3.5006e-02, PNorm = 136.0234, GNorm = 0.4746, lr_0 = 6.5201e-04
Loss = 2.5510e-02, PNorm = 136.0749, GNorm = 0.1626, lr_0 = 6.5157e-04
Loss = 2.8231e-02, PNorm = 136.1247, GNorm = 0.6590, lr_0 = 6.5112e-04
Loss = 2.7786e-02, PNorm = 136.1757, GNorm = 0.7252, lr_0 = 6.5067e-04
Loss = 2.5659e-02, PNorm = 136.2296, GNorm = 0.3330, lr_0 = 6.5023e-04
Loss = 2.6243e-02, PNorm = 136.2798, GNorm = 0.2095, lr_0 = 6.4978e-04
Loss = 2.9412e-02, PNorm = 136.3227, GNorm = 0.1928, lr_0 = 6.4934e-04
Loss = 2.6839e-02, PNorm = 136.3747, GNorm = 0.3277, lr_0 = 6.4889e-04
Loss = 2.6935e-02, PNorm = 136.4259, GNorm = 0.2457, lr_0 = 6.4845e-04
Loss = 2.7238e-02, PNorm = 136.4799, GNorm = 0.2089, lr_0 = 6.4800e-04
Loss = 2.7482e-02, PNorm = 136.5349, GNorm = 0.4284, lr_0 = 6.4756e-04
Loss = 2.2843e-02, PNorm = 136.5806, GNorm = 0.2821, lr_0 = 6.4712e-04
Loss = 3.1679e-02, PNorm = 136.6272, GNorm = 0.6393, lr_0 = 6.4667e-04
Loss = 3.0943e-02, PNorm = 136.6732, GNorm = 0.1658, lr_0 = 6.4623e-04
Loss = 2.6750e-02, PNorm = 136.7314, GNorm = 0.3827, lr_0 = 6.4579e-04
Loss = 2.1806e-02, PNorm = 136.7807, GNorm = 0.4417, lr_0 = 6.4534e-04
Loss = 2.1582e-02, PNorm = 136.8314, GNorm = 1.0005, lr_0 = 6.4490e-04
Loss = 2.3332e-02, PNorm = 136.8794, GNorm = 0.3212, lr_0 = 6.4446e-04
Loss = 2.2590e-02, PNorm = 136.9226, GNorm = 0.3284, lr_0 = 6.4402e-04
Loss = 2.5730e-02, PNorm = 136.9661, GNorm = 0.2762, lr_0 = 6.4358e-04
Loss = 2.6989e-02, PNorm = 137.0134, GNorm = 0.4277, lr_0 = 6.4314e-04
Loss = 2.2573e-02, PNorm = 137.0614, GNorm = 0.3742, lr_0 = 6.4270e-04
Loss = 2.8158e-02, PNorm = 137.1175, GNorm = 0.9704, lr_0 = 6.4226e-04
Loss = 2.4128e-02, PNorm = 137.1697, GNorm = 0.5176, lr_0 = 6.4182e-04
Loss = 2.3310e-02, PNorm = 137.2248, GNorm = 0.1813, lr_0 = 6.4138e-04
Loss = 2.5751e-02, PNorm = 137.2746, GNorm = 0.2071, lr_0 = 6.4094e-04
Loss = 2.9514e-02, PNorm = 137.3233, GNorm = 0.4629, lr_0 = 6.4050e-04
Loss = 2.4037e-02, PNorm = 137.3725, GNorm = 0.8242, lr_0 = 6.4006e-04
Loss = 3.0645e-02, PNorm = 137.4189, GNorm = 0.5920, lr_0 = 6.3962e-04
Loss = 2.4970e-02, PNorm = 137.4668, GNorm = 0.5656, lr_0 = 6.3918e-04
Loss = 2.6495e-02, PNorm = 137.5210, GNorm = 0.3586, lr_0 = 6.3874e-04
Loss = 3.0141e-02, PNorm = 137.5772, GNorm = 0.5959, lr_0 = 6.3831e-04
Loss = 2.3795e-02, PNorm = 137.6255, GNorm = 0.4227, lr_0 = 6.3787e-04
Loss = 2.5851e-02, PNorm = 137.6718, GNorm = 0.4129, lr_0 = 6.3743e-04
Loss = 3.0263e-02, PNorm = 137.7185, GNorm = 0.3671, lr_0 = 6.3700e-04
Loss = 2.1954e-02, PNorm = 137.7714, GNorm = 0.2210, lr_0 = 6.3656e-04
Loss = 2.7309e-02, PNorm = 137.8232, GNorm = 0.1975, lr_0 = 6.3612e-04
Loss = 2.4463e-02, PNorm = 137.8786, GNorm = 0.5062, lr_0 = 6.3569e-04
Loss = 2.8045e-02, PNorm = 137.9322, GNorm = 0.2842, lr_0 = 6.3525e-04
Loss = 2.7954e-02, PNorm = 137.9923, GNorm = 0.2874, lr_0 = 6.3482e-04
Loss = 2.6438e-02, PNorm = 138.0492, GNorm = 0.4482, lr_0 = 6.3438e-04
Loss = 2.3629e-02, PNorm = 138.1054, GNorm = 0.4857, lr_0 = 6.3395e-04
Loss = 2.5884e-02, PNorm = 138.1485, GNorm = 0.2519, lr_0 = 6.3351e-04
Loss = 2.5886e-02, PNorm = 138.1951, GNorm = 0.5541, lr_0 = 6.3308e-04
Loss = 2.6684e-02, PNorm = 138.2376, GNorm = 0.4277, lr_0 = 6.3265e-04
Loss = 2.7905e-02, PNorm = 138.2828, GNorm = 0.5817, lr_0 = 6.3221e-04
Loss = 2.7669e-02, PNorm = 138.3362, GNorm = 0.8835, lr_0 = 6.3178e-04
Loss = 2.5518e-02, PNorm = 138.3987, GNorm = 0.4284, lr_0 = 6.3135e-04
Loss = 2.1562e-02, PNorm = 138.4534, GNorm = 0.2704, lr_0 = 6.3091e-04
Loss = 2.8926e-02, PNorm = 138.5077, GNorm = 0.5987, lr_0 = 6.3048e-04
Loss = 2.5436e-02, PNorm = 138.5624, GNorm = 0.4409, lr_0 = 6.3005e-04
Loss = 2.7328e-02, PNorm = 138.6258, GNorm = 1.0086, lr_0 = 6.2962e-04
Loss = 2.5508e-02, PNorm = 138.6829, GNorm = 0.6197, lr_0 = 6.2919e-04
Loss = 2.6324e-02, PNorm = 138.7408, GNorm = 0.3157, lr_0 = 6.2876e-04
Loss = 2.5733e-02, PNorm = 138.7984, GNorm = 0.4305, lr_0 = 6.2833e-04
Loss = 2.6386e-02, PNorm = 138.8451, GNorm = 0.3005, lr_0 = 6.2789e-04
Loss = 2.4811e-02, PNorm = 138.8991, GNorm = 0.7983, lr_0 = 6.2746e-04
Loss = 2.6459e-02, PNorm = 138.9505, GNorm = 0.2879, lr_0 = 6.2703e-04
Loss = 2.4880e-02, PNorm = 139.0049, GNorm = 0.3401, lr_0 = 6.2661e-04
Loss = 2.3737e-02, PNorm = 139.0553, GNorm = 0.3580, lr_0 = 6.2618e-04
Loss = 2.6830e-02, PNorm = 139.1168, GNorm = 0.3065, lr_0 = 6.2575e-04
Loss = 2.7393e-02, PNorm = 139.1760, GNorm = 0.2359, lr_0 = 6.2532e-04
Loss = 2.1108e-02, PNorm = 139.2299, GNorm = 0.3848, lr_0 = 6.2489e-04
Loss = 3.3225e-02, PNorm = 139.2891, GNorm = 0.6736, lr_0 = 6.2446e-04
Loss = 2.4537e-02, PNorm = 139.3437, GNorm = 0.3005, lr_0 = 6.2403e-04
Loss = 2.9401e-02, PNorm = 139.4013, GNorm = 0.2449, lr_0 = 6.2361e-04
Loss = 2.6464e-02, PNorm = 139.4556, GNorm = 0.3158, lr_0 = 6.2318e-04
Loss = 2.5726e-02, PNorm = 139.5101, GNorm = 0.2181, lr_0 = 6.2275e-04
Loss = 2.8019e-02, PNorm = 139.5575, GNorm = 0.5545, lr_0 = 6.2233e-04
Loss = 3.7188e-02, PNorm = 139.6117, GNorm = 0.5622, lr_0 = 6.2190e-04
Loss = 3.0995e-02, PNorm = 139.6697, GNorm = 0.3181, lr_0 = 6.2147e-04
Loss = 2.4901e-02, PNorm = 139.7333, GNorm = 0.3747, lr_0 = 6.2105e-04
Loss = 2.6651e-02, PNorm = 139.7937, GNorm = 0.3527, lr_0 = 6.2062e-04
Loss = 2.6560e-02, PNorm = 139.8518, GNorm = 0.4112, lr_0 = 6.2020e-04
Loss = 3.1011e-02, PNorm = 139.9023, GNorm = 0.3442, lr_0 = 6.1977e-04
Loss = 2.8007e-02, PNorm = 139.9670, GNorm = 0.2231, lr_0 = 6.1935e-04
Loss = 2.7909e-02, PNorm = 140.0176, GNorm = 0.6058, lr_0 = 6.1892e-04
Loss = 3.3388e-02, PNorm = 140.0844, GNorm = 0.5942, lr_0 = 6.1850e-04
Loss = 2.9672e-02, PNorm = 140.1454, GNorm = 0.1890, lr_0 = 6.1808e-04
Loss = 2.9362e-02, PNorm = 140.2111, GNorm = 0.2541, lr_0 = 6.1765e-04
Loss = 2.9765e-02, PNorm = 140.2674, GNorm = 0.7044, lr_0 = 6.1723e-04
Loss = 2.6423e-02, PNorm = 140.3302, GNorm = 0.4398, lr_0 = 6.1681e-04
Loss = 2.5238e-02, PNorm = 140.3906, GNorm = 0.8777, lr_0 = 6.1638e-04
Loss = 2.8859e-02, PNorm = 140.4516, GNorm = 0.6610, lr_0 = 6.1596e-04
Loss = 3.8043e-02, PNorm = 140.5026, GNorm = 0.3461, lr_0 = 6.1554e-04
Loss = 2.9273e-02, PNorm = 140.5604, GNorm = 0.7246, lr_0 = 6.1512e-04
Loss = 3.1041e-02, PNorm = 140.6203, GNorm = 0.3914, lr_0 = 6.1470e-04
Loss = 3.0865e-02, PNorm = 140.6864, GNorm = 0.2043, lr_0 = 6.1428e-04
Loss = 3.1520e-02, PNorm = 140.7465, GNorm = 0.5088, lr_0 = 6.1385e-04
Loss = 2.4864e-02, PNorm = 140.8076, GNorm = 0.4047, lr_0 = 6.1343e-04
Loss = 2.6348e-02, PNorm = 140.8633, GNorm = 0.4273, lr_0 = 6.1301e-04
Loss = 2.6093e-02, PNorm = 140.9205, GNorm = 0.2545, lr_0 = 6.1259e-04
Loss = 2.5738e-02, PNorm = 140.9802, GNorm = 0.1867, lr_0 = 6.1217e-04
Loss = 3.2183e-02, PNorm = 141.0424, GNorm = 0.3329, lr_0 = 6.1175e-04
Loss = 2.9939e-02, PNorm = 141.1015, GNorm = 0.7812, lr_0 = 6.1134e-04
Loss = 2.5731e-02, PNorm = 141.1635, GNorm = 0.3666, lr_0 = 6.1092e-04
Loss = 2.8283e-02, PNorm = 141.2173, GNorm = 0.5710, lr_0 = 6.1050e-04
Validation mae = 0.285716
Epoch 8
Loss = 2.8258e-02, PNorm = 141.2635, GNorm = 0.2920, lr_0 = 6.1008e-04
Loss = 2.2813e-02, PNorm = 141.3125, GNorm = 0.5038, lr_0 = 6.0966e-04
Loss = 2.4185e-02, PNorm = 141.3508, GNorm = 0.5775, lr_0 = 6.0924e-04
Loss = 2.0351e-02, PNorm = 141.3905, GNorm = 0.1792, lr_0 = 6.0883e-04
Loss = 2.1802e-02, PNorm = 141.4337, GNorm = 0.3905, lr_0 = 6.0841e-04
Loss = 2.2668e-02, PNorm = 141.4701, GNorm = 0.6242, lr_0 = 6.0799e-04
Loss = 2.1028e-02, PNorm = 141.5075, GNorm = 0.2046, lr_0 = 6.0758e-04
Loss = 2.4136e-02, PNorm = 141.5421, GNorm = 0.2411, lr_0 = 6.0716e-04
Loss = 2.1219e-02, PNorm = 141.5749, GNorm = 0.2749, lr_0 = 6.0674e-04
Loss = 2.3673e-02, PNorm = 141.6130, GNorm = 0.1422, lr_0 = 6.0633e-04
Loss = 2.2374e-02, PNorm = 141.6512, GNorm = 0.2469, lr_0 = 6.0591e-04
Loss = 2.0190e-02, PNorm = 141.6925, GNorm = 0.2008, lr_0 = 6.0550e-04
Loss = 2.1983e-02, PNorm = 141.7361, GNorm = 0.3408, lr_0 = 6.0508e-04
Loss = 2.2064e-02, PNorm = 141.7772, GNorm = 0.1730, lr_0 = 6.0467e-04
Loss = 2.1621e-02, PNorm = 141.8187, GNorm = 0.4239, lr_0 = 6.0425e-04
Loss = 2.2787e-02, PNorm = 141.8609, GNorm = 0.1615, lr_0 = 6.0384e-04
Loss = 2.2551e-02, PNorm = 141.9025, GNorm = 0.4226, lr_0 = 6.0343e-04
Loss = 2.2874e-02, PNorm = 141.9455, GNorm = 0.3262, lr_0 = 6.0301e-04
Loss = 2.1109e-02, PNorm = 141.9843, GNorm = 0.5688, lr_0 = 6.0260e-04
Loss = 1.8439e-02, PNorm = 142.0192, GNorm = 0.4205, lr_0 = 6.0219e-04
Loss = 2.4795e-02, PNorm = 142.0583, GNorm = 0.1602, lr_0 = 6.0178e-04
Loss = 2.0917e-02, PNorm = 142.0947, GNorm = 0.7168, lr_0 = 6.0136e-04
Loss = 2.1307e-02, PNorm = 142.1316, GNorm = 0.3970, lr_0 = 6.0095e-04
Loss = 2.1490e-02, PNorm = 142.1704, GNorm = 0.1490, lr_0 = 6.0054e-04
Loss = 2.2599e-02, PNorm = 142.2049, GNorm = 0.4561, lr_0 = 6.0013e-04
Loss = 2.1799e-02, PNorm = 142.2495, GNorm = 0.4632, lr_0 = 5.9972e-04
Loss = 2.2750e-02, PNorm = 142.2912, GNorm = 0.2609, lr_0 = 5.9931e-04
Loss = 2.3253e-02, PNorm = 142.3319, GNorm = 0.4285, lr_0 = 5.9890e-04
Loss = 2.0541e-02, PNorm = 142.3754, GNorm = 0.6668, lr_0 = 5.9849e-04
Loss = 2.0521e-02, PNorm = 142.4205, GNorm = 0.4488, lr_0 = 5.9808e-04
Loss = 2.0491e-02, PNorm = 142.4523, GNorm = 0.2881, lr_0 = 5.9767e-04
Loss = 2.7210e-02, PNorm = 142.4882, GNorm = 0.3130, lr_0 = 5.9726e-04
Loss = 1.9665e-02, PNorm = 142.5256, GNorm = 0.2284, lr_0 = 5.9685e-04
Loss = 2.3321e-02, PNorm = 142.5728, GNorm = 0.5145, lr_0 = 5.9644e-04
Loss = 2.0470e-02, PNorm = 142.6163, GNorm = 0.2222, lr_0 = 5.9603e-04
Loss = 2.3149e-02, PNorm = 142.6623, GNorm = 0.8023, lr_0 = 5.9562e-04
Loss = 2.0488e-02, PNorm = 142.7108, GNorm = 0.2978, lr_0 = 5.9521e-04
Loss = 1.6932e-02, PNorm = 142.7535, GNorm = 0.3545, lr_0 = 5.9481e-04
Loss = 1.8718e-02, PNorm = 142.7876, GNorm = 0.5504, lr_0 = 5.9440e-04
Loss = 1.9550e-02, PNorm = 142.8246, GNorm = 0.5341, lr_0 = 5.9399e-04
Loss = 1.9918e-02, PNorm = 142.8562, GNorm = 0.3994, lr_0 = 5.9358e-04
Loss = 1.7551e-02, PNorm = 142.8936, GNorm = 0.1573, lr_0 = 5.9318e-04
Loss = 1.8865e-02, PNorm = 142.9301, GNorm = 0.4375, lr_0 = 5.9277e-04
Loss = 2.2568e-02, PNorm = 142.9656, GNorm = 0.3701, lr_0 = 5.9236e-04
Loss = 1.9233e-02, PNorm = 143.0041, GNorm = 0.3972, lr_0 = 5.9196e-04
Loss = 2.0111e-02, PNorm = 143.0437, GNorm = 0.3361, lr_0 = 5.9155e-04
Loss = 2.1891e-02, PNorm = 143.0826, GNorm = 0.2204, lr_0 = 5.9115e-04
Loss = 2.0618e-02, PNorm = 143.1264, GNorm = 0.5615, lr_0 = 5.9074e-04
Loss = 2.2763e-02, PNorm = 143.1650, GNorm = 0.5114, lr_0 = 5.9034e-04
Loss = 2.4217e-02, PNorm = 143.2038, GNorm = 0.5416, lr_0 = 5.8993e-04
Loss = 2.6411e-02, PNorm = 143.2500, GNorm = 0.4243, lr_0 = 5.8953e-04
Loss = 2.2326e-02, PNorm = 143.2995, GNorm = 0.5167, lr_0 = 5.8913e-04
Loss = 2.1058e-02, PNorm = 143.3433, GNorm = 0.5989, lr_0 = 5.8872e-04
Loss = 2.0042e-02, PNorm = 143.3824, GNorm = 0.1639, lr_0 = 5.8832e-04
Loss = 2.1619e-02, PNorm = 143.4293, GNorm = 0.4295, lr_0 = 5.8792e-04
Loss = 1.9230e-02, PNorm = 143.4724, GNorm = 0.7730, lr_0 = 5.8751e-04
Loss = 2.5322e-02, PNorm = 143.5128, GNorm = 0.4716, lr_0 = 5.8711e-04
Loss = 2.0952e-02, PNorm = 143.5542, GNorm = 0.2874, lr_0 = 5.8671e-04
Loss = 2.4446e-02, PNorm = 143.5948, GNorm = 0.2244, lr_0 = 5.8631e-04
Loss = 2.3145e-02, PNorm = 143.6451, GNorm = 0.6741, lr_0 = 5.8591e-04
Loss = 2.0981e-02, PNorm = 143.6956, GNorm = 0.2684, lr_0 = 5.8550e-04
Loss = 2.1380e-02, PNorm = 143.7431, GNorm = 0.3014, lr_0 = 5.8510e-04
Loss = 2.2589e-02, PNorm = 143.7866, GNorm = 0.2435, lr_0 = 5.8470e-04
Loss = 2.0573e-02, PNorm = 143.8308, GNorm = 0.9443, lr_0 = 5.8430e-04
Loss = 2.3284e-02, PNorm = 143.8781, GNorm = 1.2834, lr_0 = 5.8390e-04
Loss = 2.1946e-02, PNorm = 143.9250, GNorm = 0.3696, lr_0 = 5.8350e-04
Loss = 2.1319e-02, PNorm = 143.9652, GNorm = 0.3720, lr_0 = 5.8310e-04
Loss = 2.3587e-02, PNorm = 144.0100, GNorm = 0.3925, lr_0 = 5.8270e-04
Loss = 1.9939e-02, PNorm = 144.0634, GNorm = 0.3016, lr_0 = 5.8230e-04
Loss = 2.0874e-02, PNorm = 144.1069, GNorm = 0.3345, lr_0 = 5.8190e-04
Loss = 2.3418e-02, PNorm = 144.1576, GNorm = 0.3381, lr_0 = 5.8151e-04
Loss = 2.1210e-02, PNorm = 144.1999, GNorm = 0.4107, lr_0 = 5.8111e-04
Loss = 2.2900e-02, PNorm = 144.2506, GNorm = 0.7860, lr_0 = 5.8071e-04
Loss = 2.1720e-02, PNorm = 144.2966, GNorm = 0.5342, lr_0 = 5.8031e-04
Loss = 1.9084e-02, PNorm = 144.3427, GNorm = 0.2143, lr_0 = 5.7991e-04
Loss = 2.2312e-02, PNorm = 144.3867, GNorm = 0.2805, lr_0 = 5.7952e-04
Loss = 1.8220e-02, PNorm = 144.4247, GNorm = 0.1882, lr_0 = 5.7912e-04
Loss = 2.1730e-02, PNorm = 144.4665, GNorm = 0.2598, lr_0 = 5.7872e-04
Loss = 2.1627e-02, PNorm = 144.5084, GNorm = 0.6697, lr_0 = 5.7833e-04
Loss = 2.2451e-02, PNorm = 144.5575, GNorm = 0.4259, lr_0 = 5.7793e-04
Loss = 2.1399e-02, PNorm = 144.6074, GNorm = 0.1730, lr_0 = 5.7753e-04
Loss = 2.3960e-02, PNorm = 144.6514, GNorm = 0.3321, lr_0 = 5.7714e-04
Loss = 2.2211e-02, PNorm = 144.6936, GNorm = 0.4447, lr_0 = 5.7674e-04
Loss = 2.2933e-02, PNorm = 144.7396, GNorm = 0.3560, lr_0 = 5.7635e-04
Loss = 2.1679e-02, PNorm = 144.7848, GNorm = 0.3048, lr_0 = 5.7595e-04
Loss = 2.5284e-02, PNorm = 144.8254, GNorm = 0.5048, lr_0 = 5.7556e-04
Loss = 2.0479e-02, PNorm = 144.8691, GNorm = 0.2875, lr_0 = 5.7516e-04
Loss = 1.7887e-02, PNorm = 144.9116, GNorm = 0.1687, lr_0 = 5.7477e-04
Loss = 2.6543e-02, PNorm = 144.9584, GNorm = 0.5876, lr_0 = 5.7438e-04
Loss = 2.0495e-02, PNorm = 145.0123, GNorm = 0.4484, lr_0 = 5.7398e-04
Loss = 2.2098e-02, PNorm = 145.0619, GNorm = 0.4039, lr_0 = 5.7359e-04
Loss = 2.0001e-02, PNorm = 145.1120, GNorm = 0.2954, lr_0 = 5.7320e-04
Loss = 1.9929e-02, PNorm = 145.1566, GNorm = 0.5120, lr_0 = 5.7280e-04
Loss = 2.0920e-02, PNorm = 145.2019, GNorm = 0.5283, lr_0 = 5.7241e-04
Loss = 2.2109e-02, PNorm = 145.2439, GNorm = 0.2797, lr_0 = 5.7202e-04
Loss = 2.2482e-02, PNorm = 145.2815, GNorm = 0.6536, lr_0 = 5.7163e-04
Loss = 2.6502e-02, PNorm = 145.3237, GNorm = 0.2862, lr_0 = 5.7124e-04
Loss = 2.2152e-02, PNorm = 145.3654, GNorm = 0.5867, lr_0 = 5.7084e-04
Loss = 2.1672e-02, PNorm = 145.4127, GNorm = 0.5546, lr_0 = 5.7045e-04
Loss = 2.2369e-02, PNorm = 145.4617, GNorm = 0.3730, lr_0 = 5.7006e-04
Loss = 2.0632e-02, PNorm = 145.5092, GNorm = 0.2838, lr_0 = 5.6967e-04
Loss = 1.7666e-02, PNorm = 145.5555, GNorm = 0.4374, lr_0 = 5.6928e-04
Loss = 2.3683e-02, PNorm = 145.5952, GNorm = 0.6127, lr_0 = 5.6889e-04
Loss = 2.2482e-02, PNorm = 145.6456, GNorm = 0.2882, lr_0 = 5.6850e-04
Loss = 2.2447e-02, PNorm = 145.6928, GNorm = 0.6275, lr_0 = 5.6811e-04
Loss = 2.2607e-02, PNorm = 145.7463, GNorm = 0.2348, lr_0 = 5.6772e-04
Loss = 2.4905e-02, PNorm = 145.8008, GNorm = 0.2882, lr_0 = 5.6733e-04
Loss = 1.9922e-02, PNorm = 145.8506, GNorm = 0.6415, lr_0 = 5.6695e-04
Loss = 2.2188e-02, PNorm = 145.8969, GNorm = 0.4462, lr_0 = 5.6656e-04
Loss = 2.5399e-02, PNorm = 145.9469, GNorm = 0.7133, lr_0 = 5.6617e-04
Loss = 2.0983e-02, PNorm = 145.9966, GNorm = 0.7406, lr_0 = 5.6578e-04
Loss = 2.3030e-02, PNorm = 146.0434, GNorm = 0.5296, lr_0 = 5.6539e-04
Loss = 2.3283e-02, PNorm = 146.0834, GNorm = 0.3527, lr_0 = 5.6501e-04
Loss = 3.0884e-02, PNorm = 146.1373, GNorm = 0.2572, lr_0 = 5.6462e-04
Loss = 1.8308e-02, PNorm = 146.1851, GNorm = 0.5081, lr_0 = 5.6423e-04
Loss = 2.2245e-02, PNorm = 146.2323, GNorm = 0.4281, lr_0 = 5.6385e-04
Loss = 1.8192e-02, PNorm = 146.2794, GNorm = 0.2584, lr_0 = 5.6346e-04
Loss = 2.8853e-02, PNorm = 146.3293, GNorm = 0.1664, lr_0 = 5.6307e-04
Loss = 2.3012e-02, PNorm = 146.3813, GNorm = 0.1598, lr_0 = 5.6269e-04
Loss = 2.0327e-02, PNorm = 146.4321, GNorm = 0.7909, lr_0 = 5.6230e-04
Validation mae = 0.284576
Epoch 9
Loss = 1.9475e-02, PNorm = 146.4734, GNorm = 0.4558, lr_0 = 5.6192e-04
Loss = 1.9854e-02, PNorm = 146.5114, GNorm = 0.2829, lr_0 = 5.6153e-04
Loss = 1.8746e-02, PNorm = 146.5424, GNorm = 0.5081, lr_0 = 5.6115e-04
Loss = 2.0218e-02, PNorm = 146.5807, GNorm = 0.4315, lr_0 = 5.6076e-04
Loss = 1.7899e-02, PNorm = 146.6137, GNorm = 0.2397, lr_0 = 5.6038e-04
Loss = 1.7232e-02, PNorm = 146.6423, GNorm = 0.6742, lr_0 = 5.6000e-04
Loss = 1.5565e-02, PNorm = 146.6718, GNorm = 0.4675, lr_0 = 5.5961e-04
Loss = 1.8204e-02, PNorm = 146.6996, GNorm = 0.4042, lr_0 = 5.5923e-04
Loss = 1.7283e-02, PNorm = 146.7246, GNorm = 0.3472, lr_0 = 5.5885e-04
Loss = 1.6702e-02, PNorm = 146.7542, GNorm = 0.1696, lr_0 = 5.5846e-04
Loss = 1.7143e-02, PNorm = 146.7813, GNorm = 0.1645, lr_0 = 5.5808e-04
Loss = 1.9074e-02, PNorm = 146.8111, GNorm = 0.2136, lr_0 = 5.5770e-04
Loss = 1.8227e-02, PNorm = 146.8428, GNorm = 0.2152, lr_0 = 5.5732e-04
Loss = 1.7025e-02, PNorm = 146.8722, GNorm = 0.3312, lr_0 = 5.5693e-04
Loss = 1.9609e-02, PNorm = 146.9060, GNorm = 1.0398, lr_0 = 5.5655e-04
Loss = 2.4561e-02, PNorm = 146.9385, GNorm = 0.2699, lr_0 = 5.5617e-04
Loss = 2.0090e-02, PNorm = 146.9706, GNorm = 0.5047, lr_0 = 5.5579e-04
Loss = 1.6923e-02, PNorm = 147.0050, GNorm = 0.3817, lr_0 = 5.5541e-04
Loss = 1.6102e-02, PNorm = 147.0349, GNorm = 0.4916, lr_0 = 5.5503e-04
Loss = 2.0751e-02, PNorm = 147.0695, GNorm = 0.2459, lr_0 = 5.5465e-04
Loss = 1.9569e-02, PNorm = 147.1049, GNorm = 0.4011, lr_0 = 5.5427e-04
Loss = 1.9013e-02, PNorm = 147.1402, GNorm = 0.2833, lr_0 = 5.5389e-04
Loss = 1.3532e-02, PNorm = 147.1763, GNorm = 0.4759, lr_0 = 5.5351e-04
Loss = 1.7621e-02, PNorm = 147.2077, GNorm = 0.4416, lr_0 = 5.5313e-04
Loss = 1.7611e-02, PNorm = 147.2402, GNorm = 0.3425, lr_0 = 5.5275e-04
Loss = 1.7252e-02, PNorm = 147.2745, GNorm = 0.3404, lr_0 = 5.5237e-04
Loss = 1.6291e-02, PNorm = 147.3077, GNorm = 0.3589, lr_0 = 5.5199e-04
Loss = 1.9335e-02, PNorm = 147.3421, GNorm = 0.5156, lr_0 = 5.5162e-04
Loss = 2.0413e-02, PNorm = 147.3729, GNorm = 0.3653, lr_0 = 5.5124e-04
Loss = 1.8280e-02, PNorm = 147.4081, GNorm = 0.2470, lr_0 = 5.5086e-04
Loss = 1.5856e-02, PNorm = 147.4431, GNorm = 0.1917, lr_0 = 5.5048e-04
Loss = 1.5219e-02, PNorm = 147.4837, GNorm = 0.2680, lr_0 = 5.5011e-04
Loss = 1.7390e-02, PNorm = 147.5199, GNorm = 0.1583, lr_0 = 5.4973e-04
Loss = 1.9624e-02, PNorm = 147.5556, GNorm = 0.5152, lr_0 = 5.4935e-04
Loss = 1.4842e-02, PNorm = 147.5918, GNorm = 0.5126, lr_0 = 5.4898e-04
Loss = 1.8011e-02, PNorm = 147.6264, GNorm = 0.1471, lr_0 = 5.4860e-04
Loss = 1.7901e-02, PNorm = 147.6588, GNorm = 0.1741, lr_0 = 5.4822e-04
Loss = 2.0636e-02, PNorm = 147.6915, GNorm = 0.4607, lr_0 = 5.4785e-04
Loss = 1.5799e-02, PNorm = 147.7306, GNorm = 0.3657, lr_0 = 5.4747e-04
Loss = 1.7954e-02, PNorm = 147.7642, GNorm = 0.1878, lr_0 = 5.4710e-04
Loss = 1.7327e-02, PNorm = 147.8051, GNorm = 0.5363, lr_0 = 5.4672e-04
Loss = 1.7503e-02, PNorm = 147.8438, GNorm = 0.5211, lr_0 = 5.4635e-04
Loss = 1.5948e-02, PNorm = 147.8818, GNorm = 0.3841, lr_0 = 5.4597e-04
Loss = 1.6110e-02, PNorm = 147.9146, GNorm = 0.1764, lr_0 = 5.4560e-04
Loss = 2.0261e-02, PNorm = 147.9467, GNorm = 0.6140, lr_0 = 5.4523e-04
Loss = 1.9463e-02, PNorm = 147.9839, GNorm = 0.3926, lr_0 = 5.4485e-04
Loss = 1.5981e-02, PNorm = 148.0247, GNorm = 0.3505, lr_0 = 5.4448e-04
Loss = 1.7411e-02, PNorm = 148.0644, GNorm = 0.2617, lr_0 = 5.4411e-04
Loss = 1.6190e-02, PNorm = 148.1014, GNorm = 0.3897, lr_0 = 5.4373e-04
Loss = 1.7999e-02, PNorm = 148.1377, GNorm = 0.3859, lr_0 = 5.4336e-04
Loss = 1.7258e-02, PNorm = 148.1717, GNorm = 0.2110, lr_0 = 5.4299e-04
Loss = 1.9392e-02, PNorm = 148.2079, GNorm = 0.1473, lr_0 = 5.4262e-04
Loss = 1.6523e-02, PNorm = 148.2418, GNorm = 0.2704, lr_0 = 5.4225e-04
Loss = 1.8432e-02, PNorm = 148.2773, GNorm = 0.2979, lr_0 = 5.4187e-04
Loss = 1.6927e-02, PNorm = 148.3137, GNorm = 0.2034, lr_0 = 5.4150e-04
Loss = 1.5837e-02, PNorm = 148.3499, GNorm = 0.2856, lr_0 = 5.4113e-04
Loss = 1.8377e-02, PNorm = 148.3870, GNorm = 0.2169, lr_0 = 5.4076e-04
Loss = 1.5655e-02, PNorm = 148.4274, GNorm = 0.2079, lr_0 = 5.4039e-04
Loss = 1.8741e-02, PNorm = 148.4632, GNorm = 0.3301, lr_0 = 5.4002e-04
Loss = 1.7748e-02, PNorm = 148.4967, GNorm = 0.3301, lr_0 = 5.3965e-04
Loss = 1.8729e-02, PNorm = 148.5286, GNorm = 0.7255, lr_0 = 5.3928e-04
Loss = 1.7683e-02, PNorm = 148.5708, GNorm = 0.5564, lr_0 = 5.3891e-04
Loss = 2.0948e-02, PNorm = 148.6131, GNorm = 0.3400, lr_0 = 5.3854e-04
Loss = 1.8961e-02, PNorm = 148.6554, GNorm = 0.7004, lr_0 = 5.3817e-04
Loss = 1.3449e-02, PNorm = 148.6874, GNorm = 0.2231, lr_0 = 5.3781e-04
Loss = 1.7402e-02, PNorm = 148.7187, GNorm = 0.2450, lr_0 = 5.3744e-04
Loss = 1.6969e-02, PNorm = 148.7553, GNorm = 0.2781, lr_0 = 5.3707e-04
Loss = 1.4274e-02, PNorm = 148.7990, GNorm = 0.1927, lr_0 = 5.3670e-04
Loss = 1.7066e-02, PNorm = 148.8362, GNorm = 0.4443, lr_0 = 5.3633e-04
Loss = 1.5523e-02, PNorm = 148.8707, GNorm = 0.3521, lr_0 = 5.3597e-04
Loss = 1.4437e-02, PNorm = 148.9017, GNorm = 0.1213, lr_0 = 5.3560e-04
Loss = 1.6582e-02, PNorm = 148.9352, GNorm = 0.2744, lr_0 = 5.3523e-04
Loss = 1.8897e-02, PNorm = 148.9763, GNorm = 0.2961, lr_0 = 5.3486e-04
Loss = 1.7702e-02, PNorm = 149.0166, GNorm = 0.3425, lr_0 = 5.3450e-04
Loss = 1.9573e-02, PNorm = 149.0520, GNorm = 0.3431, lr_0 = 5.3413e-04
Loss = 1.5416e-02, PNorm = 149.0878, GNorm = 0.1778, lr_0 = 5.3377e-04
Loss = 1.6761e-02, PNorm = 149.1272, GNorm = 0.1944, lr_0 = 5.3340e-04
Loss = 1.5777e-02, PNorm = 149.1687, GNorm = 0.6531, lr_0 = 5.3304e-04
Loss = 1.4978e-02, PNorm = 149.2039, GNorm = 0.2264, lr_0 = 5.3267e-04
Loss = 1.6039e-02, PNorm = 149.2402, GNorm = 0.2446, lr_0 = 5.3231e-04
Loss = 1.4401e-02, PNorm = 149.2785, GNorm = 0.1400, lr_0 = 5.3194e-04
Loss = 1.5841e-02, PNorm = 149.3195, GNorm = 0.3975, lr_0 = 5.3158e-04
Loss = 1.8006e-02, PNorm = 149.3636, GNorm = 0.1335, lr_0 = 5.3121e-04
Loss = 2.1663e-02, PNorm = 149.4012, GNorm = 0.4741, lr_0 = 5.3085e-04
Loss = 1.9862e-02, PNorm = 149.4411, GNorm = 0.1949, lr_0 = 5.3048e-04
Loss = 1.9700e-02, PNorm = 149.4863, GNorm = 0.1686, lr_0 = 5.3012e-04
Loss = 1.5089e-02, PNorm = 149.5279, GNorm = 0.2482, lr_0 = 5.2976e-04
Loss = 1.8606e-02, PNorm = 149.5701, GNorm = 0.2758, lr_0 = 5.2939e-04
Loss = 1.9747e-02, PNorm = 149.6113, GNorm = 0.2017, lr_0 = 5.2903e-04
Loss = 1.7598e-02, PNorm = 149.6534, GNorm = 0.1902, lr_0 = 5.2867e-04
Loss = 2.2338e-02, PNorm = 149.6975, GNorm = 0.5920, lr_0 = 5.2831e-04
Loss = 1.5953e-02, PNorm = 149.7413, GNorm = 0.2617, lr_0 = 5.2795e-04
Loss = 1.5360e-02, PNorm = 149.7777, GNorm = 0.3438, lr_0 = 5.2758e-04
Loss = 1.6774e-02, PNorm = 149.8160, GNorm = 0.3595, lr_0 = 5.2722e-04
Loss = 1.5864e-02, PNorm = 149.8535, GNorm = 0.2994, lr_0 = 5.2686e-04
Loss = 1.4826e-02, PNorm = 149.8921, GNorm = 0.2033, lr_0 = 5.2650e-04
Loss = 1.4283e-02, PNorm = 149.9237, GNorm = 0.3306, lr_0 = 5.2614e-04
Loss = 1.6649e-02, PNorm = 149.9606, GNorm = 0.2514, lr_0 = 5.2578e-04
Loss = 1.7607e-02, PNorm = 149.9969, GNorm = 0.7494, lr_0 = 5.2542e-04
Loss = 1.4813e-02, PNorm = 150.0280, GNorm = 0.4460, lr_0 = 5.2506e-04
Loss = 2.1267e-02, PNorm = 150.0581, GNorm = 0.6908, lr_0 = 5.2470e-04
Loss = 1.7423e-02, PNorm = 150.1020, GNorm = 0.3643, lr_0 = 5.2434e-04
Loss = 2.5657e-02, PNorm = 150.1398, GNorm = 0.1566, lr_0 = 5.2398e-04
Loss = 1.6714e-02, PNorm = 150.1816, GNorm = 0.3673, lr_0 = 5.2362e-04
Loss = 1.6194e-02, PNorm = 150.2199, GNorm = 0.3718, lr_0 = 5.2326e-04
Loss = 1.8259e-02, PNorm = 150.2617, GNorm = 0.2200, lr_0 = 5.2290e-04
Loss = 1.5083e-02, PNorm = 150.2980, GNorm = 0.3081, lr_0 = 5.2255e-04
Loss = 1.7795e-02, PNorm = 150.3334, GNorm = 0.4318, lr_0 = 5.2219e-04
Loss = 1.7634e-02, PNorm = 150.3699, GNorm = 0.3577, lr_0 = 5.2183e-04
Loss = 1.6598e-02, PNorm = 150.4090, GNorm = 0.1520, lr_0 = 5.2147e-04
Loss = 1.8100e-02, PNorm = 150.4519, GNorm = 0.5132, lr_0 = 5.2112e-04
Loss = 1.7058e-02, PNorm = 150.4932, GNorm = 0.2427, lr_0 = 5.2076e-04
Loss = 1.9756e-02, PNorm = 150.5397, GNorm = 0.3125, lr_0 = 5.2040e-04
Loss = 1.9945e-02, PNorm = 150.5865, GNorm = 0.2784, lr_0 = 5.2005e-04
Loss = 1.7701e-02, PNorm = 150.6350, GNorm = 0.4221, lr_0 = 5.1969e-04
Loss = 1.6787e-02, PNorm = 150.6738, GNorm = 0.2491, lr_0 = 5.1933e-04
Loss = 1.9732e-02, PNorm = 150.7228, GNorm = 0.3941, lr_0 = 5.1898e-04
Loss = 1.6896e-02, PNorm = 150.7652, GNorm = 0.2279, lr_0 = 5.1862e-04
Loss = 1.6142e-02, PNorm = 150.8032, GNorm = 0.5244, lr_0 = 5.1827e-04
Loss = 1.5803e-02, PNorm = 150.8364, GNorm = 0.2068, lr_0 = 5.1791e-04
Validation mae = 0.281920
Epoch 10
Loss = 1.6690e-02, PNorm = 150.8636, GNorm = 0.4416, lr_0 = 5.1756e-04
Loss = 1.5612e-02, PNorm = 150.8924, GNorm = 0.1465, lr_0 = 5.1720e-04
Loss = 1.7229e-02, PNorm = 150.9224, GNorm = 0.2623, lr_0 = 5.1685e-04
Loss = 1.8773e-02, PNorm = 150.9512, GNorm = 0.3549, lr_0 = 5.1649e-04
Loss = 1.6340e-02, PNorm = 150.9762, GNorm = 0.2061, lr_0 = 5.1614e-04
Loss = 1.5478e-02, PNorm = 151.0099, GNorm = 0.4753, lr_0 = 5.1579e-04
Loss = 1.4885e-02, PNorm = 151.0412, GNorm = 0.2926, lr_0 = 5.1543e-04
Loss = 1.6932e-02, PNorm = 151.0665, GNorm = 0.2665, lr_0 = 5.1508e-04
Loss = 1.7224e-02, PNorm = 151.0920, GNorm = 0.2994, lr_0 = 5.1473e-04
Loss = 1.5153e-02, PNorm = 151.1217, GNorm = 0.2492, lr_0 = 5.1437e-04
Loss = 1.6142e-02, PNorm = 151.1509, GNorm = 0.1772, lr_0 = 5.1402e-04
Loss = 1.3255e-02, PNorm = 151.1820, GNorm = 0.2751, lr_0 = 5.1367e-04
Loss = 1.5257e-02, PNorm = 151.2059, GNorm = 0.3164, lr_0 = 5.1332e-04
Loss = 1.6191e-02, PNorm = 151.2317, GNorm = 0.7442, lr_0 = 5.1297e-04
Loss = 1.6490e-02, PNorm = 151.2556, GNorm = 0.4260, lr_0 = 5.1262e-04
Loss = 1.4936e-02, PNorm = 151.2883, GNorm = 0.2469, lr_0 = 5.1226e-04
Loss = 1.2683e-02, PNorm = 151.3176, GNorm = 0.1359, lr_0 = 5.1191e-04
Loss = 1.4954e-02, PNorm = 151.3450, GNorm = 0.7294, lr_0 = 5.1156e-04
Loss = 1.3756e-02, PNorm = 151.3721, GNorm = 0.5367, lr_0 = 5.1121e-04
Loss = 1.6095e-02, PNorm = 151.3954, GNorm = 0.2414, lr_0 = 5.1086e-04
Loss = 1.3440e-02, PNorm = 151.4175, GNorm = 0.1541, lr_0 = 5.1051e-04
Loss = 1.2847e-02, PNorm = 151.4473, GNorm = 0.3468, lr_0 = 5.1016e-04
Loss = 1.4343e-02, PNorm = 151.4761, GNorm = 0.4710, lr_0 = 5.0981e-04
Loss = 1.3672e-02, PNorm = 151.5057, GNorm = 0.1721, lr_0 = 5.0946e-04
Loss = 1.2361e-02, PNorm = 151.5351, GNorm = 0.1778, lr_0 = 5.0911e-04
Loss = 1.1923e-02, PNorm = 151.5636, GNorm = 0.0940, lr_0 = 5.0877e-04
Loss = 1.0889e-02, PNorm = 151.5900, GNorm = 0.1706, lr_0 = 5.0842e-04
Loss = 1.2850e-02, PNorm = 151.6115, GNorm = 0.4019, lr_0 = 5.0807e-04
Loss = 1.3258e-02, PNorm = 151.6383, GNorm = 0.3257, lr_0 = 5.0772e-04
Loss = 1.2012e-02, PNorm = 151.6620, GNorm = 0.2218, lr_0 = 5.0737e-04
Loss = 1.2850e-02, PNorm = 151.6859, GNorm = 0.6507, lr_0 = 5.0703e-04
Loss = 1.4995e-02, PNorm = 151.7154, GNorm = 0.1472, lr_0 = 5.0668e-04
Loss = 1.6645e-02, PNorm = 151.7509, GNorm = 0.1928, lr_0 = 5.0633e-04
Loss = 1.3200e-02, PNorm = 151.7826, GNorm = 0.3139, lr_0 = 5.0598e-04
Loss = 1.5742e-02, PNorm = 151.8128, GNorm = 0.4066, lr_0 = 5.0564e-04
Loss = 1.3058e-02, PNorm = 151.8426, GNorm = 0.3729, lr_0 = 5.0529e-04
Loss = 1.0229e-02, PNorm = 151.8717, GNorm = 0.3971, lr_0 = 5.0494e-04
Loss = 1.4116e-02, PNorm = 151.8975, GNorm = 0.4169, lr_0 = 5.0460e-04
Loss = 1.6407e-02, PNorm = 151.9271, GNorm = 0.2573, lr_0 = 5.0425e-04
Loss = 1.3928e-02, PNorm = 151.9510, GNorm = 0.2454, lr_0 = 5.0391e-04
Loss = 1.6937e-02, PNorm = 151.9815, GNorm = 0.3834, lr_0 = 5.0356e-04
Loss = 1.1223e-02, PNorm = 152.0129, GNorm = 0.1035, lr_0 = 5.0322e-04
Loss = 1.3306e-02, PNorm = 152.0434, GNorm = 0.3748, lr_0 = 5.0287e-04
Loss = 1.1940e-02, PNorm = 152.0729, GNorm = 0.2543, lr_0 = 5.0253e-04
Loss = 1.4241e-02, PNorm = 152.1004, GNorm = 0.2303, lr_0 = 5.0218e-04
Loss = 1.5550e-02, PNorm = 152.1307, GNorm = 0.2144, lr_0 = 5.0184e-04
Loss = 1.4189e-02, PNorm = 152.1619, GNorm = 0.1387, lr_0 = 5.0150e-04
Loss = 1.4948e-02, PNorm = 152.1928, GNorm = 0.2086, lr_0 = 5.0115e-04
Loss = 1.2708e-02, PNorm = 152.2214, GNorm = 0.2740, lr_0 = 5.0081e-04
Loss = 1.2739e-02, PNorm = 152.2511, GNorm = 0.1832, lr_0 = 5.0047e-04
Loss = 1.4460e-02, PNorm = 152.2815, GNorm = 0.3417, lr_0 = 5.0012e-04
Loss = 1.3659e-02, PNorm = 152.3146, GNorm = 0.2497, lr_0 = 4.9978e-04
Loss = 1.6375e-02, PNorm = 152.3431, GNorm = 0.1258, lr_0 = 4.9944e-04
Loss = 1.3035e-02, PNorm = 152.3769, GNorm = 0.1558, lr_0 = 4.9910e-04
Loss = 1.6175e-02, PNorm = 152.4094, GNorm = 0.5356, lr_0 = 4.9875e-04
Loss = 1.6358e-02, PNorm = 152.4371, GNorm = 0.1848, lr_0 = 4.9841e-04
Loss = 1.4155e-02, PNorm = 152.4634, GNorm = 0.2024, lr_0 = 4.9807e-04
Loss = 1.7270e-02, PNorm = 152.4947, GNorm = 0.1558, lr_0 = 4.9773e-04
Loss = 1.4983e-02, PNorm = 152.5254, GNorm = 0.7588, lr_0 = 4.9739e-04
Loss = 1.4341e-02, PNorm = 152.5593, GNorm = 0.7751, lr_0 = 4.9705e-04
Loss = 1.5141e-02, PNorm = 152.5912, GNorm = 0.5281, lr_0 = 4.9671e-04
Loss = 1.4959e-02, PNorm = 152.6237, GNorm = 0.3750, lr_0 = 4.9637e-04
Loss = 1.2910e-02, PNorm = 152.6557, GNorm = 0.9136, lr_0 = 4.9603e-04
Loss = 1.2685e-02, PNorm = 152.6866, GNorm = 0.5136, lr_0 = 4.9569e-04
Loss = 2.5418e-02, PNorm = 152.7201, GNorm = 0.3128, lr_0 = 4.9535e-04
Loss = 1.3856e-02, PNorm = 152.7573, GNorm = 0.3674, lr_0 = 4.9501e-04
Loss = 1.5048e-02, PNorm = 152.7962, GNorm = 0.6169, lr_0 = 4.9467e-04
Loss = 1.6578e-02, PNorm = 152.8306, GNorm = 0.6662, lr_0 = 4.9433e-04
Loss = 1.4844e-02, PNorm = 152.8610, GNorm = 0.1908, lr_0 = 4.9399e-04
Loss = 1.3307e-02, PNorm = 152.8925, GNorm = 0.2240, lr_0 = 4.9365e-04
Loss = 1.5814e-02, PNorm = 152.9265, GNorm = 0.5087, lr_0 = 4.9332e-04
Loss = 1.2204e-02, PNorm = 152.9592, GNorm = 0.1733, lr_0 = 4.9298e-04
Loss = 1.5630e-02, PNorm = 152.9918, GNorm = 0.2893, lr_0 = 4.9264e-04
Loss = 1.3684e-02, PNorm = 153.0230, GNorm = 0.4788, lr_0 = 4.9230e-04
Loss = 1.3613e-02, PNorm = 153.0540, GNorm = 0.4327, lr_0 = 4.9197e-04
Loss = 1.3183e-02, PNorm = 153.0860, GNorm = 0.2858, lr_0 = 4.9163e-04
Loss = 1.6493e-02, PNorm = 153.1237, GNorm = 0.1846, lr_0 = 4.9129e-04
Loss = 1.2803e-02, PNorm = 153.1573, GNorm = 0.5929, lr_0 = 4.9095e-04
Loss = 1.2733e-02, PNorm = 153.1901, GNorm = 0.2132, lr_0 = 4.9062e-04
Loss = 1.3773e-02, PNorm = 153.2182, GNorm = 0.6005, lr_0 = 4.9028e-04
Loss = 1.3504e-02, PNorm = 153.2467, GNorm = 0.3092, lr_0 = 4.8995e-04
Loss = 1.5036e-02, PNorm = 153.2757, GNorm = 0.8091, lr_0 = 4.8961e-04
Loss = 1.3522e-02, PNorm = 153.3079, GNorm = 0.3793, lr_0 = 4.8928e-04
Loss = 1.4236e-02, PNorm = 153.3452, GNorm = 0.3091, lr_0 = 4.8894e-04
Loss = 1.3668e-02, PNorm = 153.3741, GNorm = 0.3342, lr_0 = 4.8861e-04
Loss = 1.7173e-02, PNorm = 153.4076, GNorm = 0.4036, lr_0 = 4.8827e-04
Loss = 1.2987e-02, PNorm = 153.4364, GNorm = 0.2905, lr_0 = 4.8794e-04
Loss = 1.2682e-02, PNorm = 153.4671, GNorm = 0.2787, lr_0 = 4.8760e-04
Loss = 1.2643e-02, PNorm = 153.5010, GNorm = 0.3316, lr_0 = 4.8727e-04
Loss = 1.4130e-02, PNorm = 153.5316, GNorm = 0.4649, lr_0 = 4.8693e-04
Loss = 1.3232e-02, PNorm = 153.5641, GNorm = 0.4446, lr_0 = 4.8660e-04
Loss = 1.4782e-02, PNorm = 153.6030, GNorm = 0.4068, lr_0 = 4.8627e-04
Loss = 1.4521e-02, PNorm = 153.6359, GNorm = 0.1764, lr_0 = 4.8593e-04
Loss = 1.4172e-02, PNorm = 153.6703, GNorm = 0.6598, lr_0 = 4.8560e-04
Loss = 1.3423e-02, PNorm = 153.7066, GNorm = 0.2341, lr_0 = 4.8527e-04
Loss = 1.3671e-02, PNorm = 153.7387, GNorm = 0.3610, lr_0 = 4.8494e-04
Loss = 1.2982e-02, PNorm = 153.7676, GNorm = 0.3993, lr_0 = 4.8460e-04
Loss = 1.3904e-02, PNorm = 153.7975, GNorm = 0.1986, lr_0 = 4.8427e-04
Loss = 1.6504e-02, PNorm = 153.8302, GNorm = 0.5711, lr_0 = 4.8394e-04
Loss = 1.4036e-02, PNorm = 153.8612, GNorm = 0.1984, lr_0 = 4.8361e-04
Loss = 1.2278e-02, PNorm = 153.8944, GNorm = 0.2587, lr_0 = 4.8328e-04
Loss = 1.4518e-02, PNorm = 153.9267, GNorm = 0.2258, lr_0 = 4.8295e-04
Loss = 1.7674e-02, PNorm = 153.9567, GNorm = 0.2496, lr_0 = 4.8262e-04
Loss = 1.1886e-02, PNorm = 153.9898, GNorm = 0.1722, lr_0 = 4.8228e-04
Loss = 1.2734e-02, PNorm = 154.0162, GNorm = 0.3223, lr_0 = 4.8195e-04
Loss = 1.3693e-02, PNorm = 154.0456, GNorm = 0.1022, lr_0 = 4.8162e-04
Loss = 1.3641e-02, PNorm = 154.0778, GNorm = 0.1891, lr_0 = 4.8129e-04
Loss = 1.5367e-02, PNorm = 154.1118, GNorm = 0.4403, lr_0 = 4.8096e-04
Loss = 1.2166e-02, PNorm = 154.1446, GNorm = 0.2238, lr_0 = 4.8064e-04
Loss = 1.7102e-02, PNorm = 154.1790, GNorm = 0.4235, lr_0 = 4.8031e-04
Loss = 1.4679e-02, PNorm = 154.2183, GNorm = 0.3613, lr_0 = 4.7998e-04
Loss = 1.3072e-02, PNorm = 154.2515, GNorm = 0.2460, lr_0 = 4.7965e-04
Loss = 1.7155e-02, PNorm = 154.2835, GNorm = 0.2557, lr_0 = 4.7932e-04
Loss = 1.3929e-02, PNorm = 154.3167, GNorm = 0.4301, lr_0 = 4.7899e-04
Loss = 1.3040e-02, PNorm = 154.3516, GNorm = 0.2536, lr_0 = 4.7866e-04
Loss = 1.4139e-02, PNorm = 154.3827, GNorm = 0.2141, lr_0 = 4.7833e-04
Loss = 1.3844e-02, PNorm = 154.4140, GNorm = 0.4098, lr_0 = 4.7801e-04
Loss = 1.4728e-02, PNorm = 154.4462, GNorm = 0.3326, lr_0 = 4.7768e-04
Loss = 1.6498e-02, PNorm = 154.4775, GNorm = 0.3794, lr_0 = 4.7735e-04
Loss = 1.3047e-02, PNorm = 154.5150, GNorm = 0.4360, lr_0 = 4.7703e-04
Validation mae = 0.280610
Epoch 11
Loss = 1.2105e-02, PNorm = 154.5420, GNorm = 0.4616, lr_0 = 4.7670e-04
Loss = 1.1610e-02, PNorm = 154.5667, GNorm = 0.3407, lr_0 = 4.7637e-04
Loss = 1.3737e-02, PNorm = 154.5889, GNorm = 0.3683, lr_0 = 4.7605e-04
Loss = 1.1236e-02, PNorm = 154.6104, GNorm = 0.1449, lr_0 = 4.7572e-04
Loss = 1.1918e-02, PNorm = 154.6301, GNorm = 0.1625, lr_0 = 4.7539e-04
Loss = 1.4581e-02, PNorm = 154.6540, GNorm = 0.3493, lr_0 = 4.7507e-04
Loss = 1.2573e-02, PNorm = 154.6842, GNorm = 0.2969, lr_0 = 4.7474e-04
Loss = 1.6418e-02, PNorm = 154.7014, GNorm = 0.3156, lr_0 = 4.7442e-04
Loss = 1.1555e-02, PNorm = 154.7264, GNorm = 0.3340, lr_0 = 4.7409e-04
Loss = 1.2220e-02, PNorm = 154.7442, GNorm = 0.3697, lr_0 = 4.7377e-04
Loss = 1.3199e-02, PNorm = 154.7694, GNorm = 0.4002, lr_0 = 4.7344e-04
Loss = 1.5149e-02, PNorm = 154.7956, GNorm = 0.4518, lr_0 = 4.7312e-04
Loss = 1.1140e-02, PNorm = 154.8257, GNorm = 0.2711, lr_0 = 4.7279e-04
Loss = 1.2828e-02, PNorm = 154.8517, GNorm = 0.2276, lr_0 = 4.7247e-04
Loss = 1.0128e-02, PNorm = 154.8764, GNorm = 0.2792, lr_0 = 4.7215e-04
Loss = 1.1510e-02, PNorm = 154.9008, GNorm = 0.4186, lr_0 = 4.7182e-04
Loss = 1.2327e-02, PNorm = 154.9244, GNorm = 0.2285, lr_0 = 4.7150e-04
Loss = 1.1057e-02, PNorm = 154.9454, GNorm = 0.3126, lr_0 = 4.7118e-04
Loss = 1.0779e-02, PNorm = 154.9688, GNorm = 0.1774, lr_0 = 4.7085e-04
Loss = 1.0131e-02, PNorm = 154.9918, GNorm = 0.1278, lr_0 = 4.7053e-04
Loss = 1.2098e-02, PNorm = 155.0138, GNorm = 0.5917, lr_0 = 4.7021e-04
Loss = 1.1325e-02, PNorm = 155.0391, GNorm = 0.4027, lr_0 = 4.6989e-04
Loss = 1.6575e-02, PNorm = 155.0633, GNorm = 0.3039, lr_0 = 4.6957e-04
Loss = 1.3913e-02, PNorm = 155.0903, GNorm = 0.2887, lr_0 = 4.6924e-04
Loss = 1.6536e-02, PNorm = 155.1165, GNorm = 0.1974, lr_0 = 4.6892e-04
Loss = 1.0235e-02, PNorm = 155.1423, GNorm = 0.2252, lr_0 = 4.6860e-04
Loss = 1.2770e-02, PNorm = 155.1605, GNorm = 0.2069, lr_0 = 4.6828e-04
Loss = 1.1706e-02, PNorm = 155.1829, GNorm = 0.4516, lr_0 = 4.6796e-04
Loss = 1.1005e-02, PNorm = 155.2068, GNorm = 0.2415, lr_0 = 4.6764e-04
Loss = 1.1286e-02, PNorm = 155.2310, GNorm = 0.2259, lr_0 = 4.6732e-04
Loss = 1.2682e-02, PNorm = 155.2551, GNorm = 0.9071, lr_0 = 4.6700e-04
Loss = 1.3495e-02, PNorm = 155.2792, GNorm = 0.4745, lr_0 = 4.6668e-04
Loss = 1.1779e-02, PNorm = 155.3054, GNorm = 0.2559, lr_0 = 4.6636e-04
Loss = 1.1676e-02, PNorm = 155.3304, GNorm = 0.5948, lr_0 = 4.6604e-04
Loss = 1.1955e-02, PNorm = 155.3560, GNorm = 0.4307, lr_0 = 4.6572e-04
Loss = 1.2440e-02, PNorm = 155.3809, GNorm = 0.2856, lr_0 = 4.6540e-04
Loss = 1.2970e-02, PNorm = 155.4086, GNorm = 0.9852, lr_0 = 4.6508e-04
Loss = 1.2246e-02, PNorm = 155.4317, GNorm = 0.3008, lr_0 = 4.6476e-04
Loss = 1.1354e-02, PNorm = 155.4522, GNorm = 0.3296, lr_0 = 4.6445e-04
Loss = 1.2723e-02, PNorm = 155.4711, GNorm = 0.2343, lr_0 = 4.6413e-04
Loss = 1.2266e-02, PNorm = 155.4973, GNorm = 0.3471, lr_0 = 4.6381e-04
Loss = 1.1244e-02, PNorm = 155.5240, GNorm = 1.0392, lr_0 = 4.6349e-04
Loss = 1.4048e-02, PNorm = 155.5551, GNorm = 0.1788, lr_0 = 4.6317e-04
Loss = 1.0432e-02, PNorm = 155.5805, GNorm = 0.1701, lr_0 = 4.6286e-04
Loss = 1.2429e-02, PNorm = 155.6058, GNorm = 0.1877, lr_0 = 4.6254e-04
Loss = 1.1014e-02, PNorm = 155.6327, GNorm = 0.4909, lr_0 = 4.6222e-04
Loss = 1.0268e-02, PNorm = 155.6583, GNorm = 0.4156, lr_0 = 4.6191e-04
Loss = 1.1693e-02, PNorm = 155.6818, GNorm = 0.1389, lr_0 = 4.6159e-04
Loss = 1.0745e-02, PNorm = 155.7087, GNorm = 0.1320, lr_0 = 4.6127e-04
Loss = 1.2345e-02, PNorm = 155.7284, GNorm = 0.3772, lr_0 = 4.6096e-04
Loss = 1.0229e-02, PNorm = 155.7533, GNorm = 0.1268, lr_0 = 4.6064e-04
Loss = 1.1898e-02, PNorm = 155.7796, GNorm = 0.3999, lr_0 = 4.6033e-04
Loss = 1.2949e-02, PNorm = 155.8074, GNorm = 0.1426, lr_0 = 4.6001e-04
Loss = 1.1819e-02, PNorm = 155.8339, GNorm = 0.3572, lr_0 = 4.5970e-04
Loss = 1.0807e-02, PNorm = 155.8600, GNorm = 0.2506, lr_0 = 4.5938e-04
Loss = 1.1679e-02, PNorm = 155.8871, GNorm = 0.1649, lr_0 = 4.5907e-04
Loss = 9.8398e-03, PNorm = 155.9125, GNorm = 0.3455, lr_0 = 4.5875e-04
Loss = 1.0166e-02, PNorm = 155.9378, GNorm = 0.5077, lr_0 = 4.5844e-04
Loss = 1.2922e-02, PNorm = 155.9621, GNorm = 0.3614, lr_0 = 4.5812e-04
Loss = 1.1026e-02, PNorm = 155.9906, GNorm = 0.2358, lr_0 = 4.5781e-04
Loss = 1.1097e-02, PNorm = 156.0191, GNorm = 0.1781, lr_0 = 4.5750e-04
Loss = 1.3750e-02, PNorm = 156.0472, GNorm = 0.2525, lr_0 = 4.5718e-04
Loss = 1.1623e-02, PNorm = 156.0724, GNorm = 0.2790, lr_0 = 4.5687e-04
Loss = 1.0948e-02, PNorm = 156.0958, GNorm = 0.3662, lr_0 = 4.5656e-04
Loss = 1.2659e-02, PNorm = 156.1191, GNorm = 0.3591, lr_0 = 4.5624e-04
Loss = 1.3939e-02, PNorm = 156.1437, GNorm = 0.6193, lr_0 = 4.5593e-04
Loss = 1.2443e-02, PNorm = 156.1723, GNorm = 0.2852, lr_0 = 4.5562e-04
Loss = 1.3406e-02, PNorm = 156.1961, GNorm = 0.4539, lr_0 = 4.5531e-04
Loss = 1.1253e-02, PNorm = 156.2207, GNorm = 0.3214, lr_0 = 4.5499e-04
Loss = 1.1120e-02, PNorm = 156.2443, GNorm = 0.3423, lr_0 = 4.5468e-04
Loss = 1.1652e-02, PNorm = 156.2729, GNorm = 0.3311, lr_0 = 4.5437e-04
Loss = 1.1916e-02, PNorm = 156.2997, GNorm = 0.1892, lr_0 = 4.5406e-04
Loss = 1.1713e-02, PNorm = 156.3224, GNorm = 0.1800, lr_0 = 4.5375e-04
Loss = 1.0562e-02, PNorm = 156.3500, GNorm = 0.3274, lr_0 = 4.5344e-04
Loss = 1.1370e-02, PNorm = 156.3798, GNorm = 0.2286, lr_0 = 4.5313e-04
Loss = 1.3073e-02, PNorm = 156.4086, GNorm = 0.1773, lr_0 = 4.5282e-04
Loss = 1.2781e-02, PNorm = 156.4370, GNorm = 0.6602, lr_0 = 4.5251e-04
Loss = 1.2652e-02, PNorm = 156.4630, GNorm = 0.3645, lr_0 = 4.5220e-04
Loss = 1.5175e-02, PNorm = 156.4891, GNorm = 0.2684, lr_0 = 4.5189e-04
Loss = 1.1311e-02, PNorm = 156.5121, GNorm = 0.2423, lr_0 = 4.5158e-04
Loss = 1.0232e-02, PNorm = 156.5366, GNorm = 0.6450, lr_0 = 4.5127e-04
Loss = 1.0203e-02, PNorm = 156.5655, GNorm = 0.3018, lr_0 = 4.5096e-04
Loss = 1.3320e-02, PNorm = 156.5946, GNorm = 0.3894, lr_0 = 4.5065e-04
Loss = 1.1548e-02, PNorm = 156.6196, GNorm = 0.3433, lr_0 = 4.5034e-04
Loss = 1.3065e-02, PNorm = 156.6446, GNorm = 0.2396, lr_0 = 4.5003e-04
Loss = 1.4230e-02, PNorm = 156.6732, GNorm = 0.3517, lr_0 = 4.4972e-04
Loss = 1.2832e-02, PNorm = 156.7067, GNorm = 0.2025, lr_0 = 4.4942e-04
Loss = 1.0875e-02, PNorm = 156.7375, GNorm = 0.4291, lr_0 = 4.4911e-04
Loss = 1.1771e-02, PNorm = 156.7635, GNorm = 0.2198, lr_0 = 4.4880e-04
Loss = 9.9550e-03, PNorm = 156.7883, GNorm = 0.2765, lr_0 = 4.4849e-04
Loss = 1.5545e-02, PNorm = 156.8138, GNorm = 0.6417, lr_0 = 4.4819e-04
Loss = 1.1673e-02, PNorm = 156.8389, GNorm = 0.6551, lr_0 = 4.4788e-04
Loss = 1.2226e-02, PNorm = 156.8686, GNorm = 0.5804, lr_0 = 4.4757e-04
Loss = 1.1866e-02, PNorm = 156.8962, GNorm = 0.3835, lr_0 = 4.4727e-04
Loss = 1.3968e-02, PNorm = 156.9243, GNorm = 0.3243, lr_0 = 4.4696e-04
Loss = 9.0482e-03, PNorm = 156.9487, GNorm = 0.1253, lr_0 = 4.4665e-04
Loss = 1.0243e-02, PNorm = 156.9737, GNorm = 0.1617, lr_0 = 4.4635e-04
Loss = 1.0349e-02, PNorm = 156.9951, GNorm = 0.2331, lr_0 = 4.4604e-04
Loss = 1.1172e-02, PNorm = 157.0176, GNorm = 0.1863, lr_0 = 4.4574e-04
Loss = 9.8796e-03, PNorm = 157.0443, GNorm = 0.2641, lr_0 = 4.4543e-04
Loss = 1.2459e-02, PNorm = 157.0728, GNorm = 0.2150, lr_0 = 4.4513e-04
Loss = 1.2573e-02, PNorm = 157.1014, GNorm = 0.4808, lr_0 = 4.4482e-04
Loss = 1.3280e-02, PNorm = 157.1315, GNorm = 0.2003, lr_0 = 4.4452e-04
Loss = 1.3901e-02, PNorm = 157.1586, GNorm = 0.5956, lr_0 = 4.4421e-04
Loss = 1.2543e-02, PNorm = 157.1866, GNorm = 0.4218, lr_0 = 4.4391e-04
Loss = 1.1919e-02, PNorm = 157.2189, GNorm = 0.3724, lr_0 = 4.4360e-04
Loss = 1.4670e-02, PNorm = 157.2508, GNorm = 0.4527, lr_0 = 4.4330e-04
Loss = 1.1289e-02, PNorm = 157.2812, GNorm = 0.5672, lr_0 = 4.4299e-04
Loss = 1.1190e-02, PNorm = 157.3098, GNorm = 0.1788, lr_0 = 4.4269e-04
Loss = 1.2295e-02, PNorm = 157.3407, GNorm = 0.4721, lr_0 = 4.4239e-04
Loss = 9.8524e-03, PNorm = 157.3670, GNorm = 0.1737, lr_0 = 4.4209e-04
Loss = 1.3628e-02, PNorm = 157.3913, GNorm = 0.5656, lr_0 = 4.4178e-04
Loss = 1.2747e-02, PNorm = 157.4211, GNorm = 0.5425, lr_0 = 4.4148e-04
Loss = 1.2271e-02, PNorm = 157.4513, GNorm = 0.2101, lr_0 = 4.4118e-04
Loss = 1.1313e-02, PNorm = 157.4786, GNorm = 0.2682, lr_0 = 4.4088e-04
Loss = 1.0761e-02, PNorm = 157.5037, GNorm = 0.3767, lr_0 = 4.4057e-04
Loss = 1.1104e-02, PNorm = 157.5323, GNorm = 0.3873, lr_0 = 4.4027e-04
Loss = 1.1773e-02, PNorm = 157.5570, GNorm = 0.2610, lr_0 = 4.3997e-04
Loss = 1.4074e-02, PNorm = 157.5883, GNorm = 0.1464, lr_0 = 4.3967e-04
Loss = 1.1866e-02, PNorm = 157.6159, GNorm = 0.2747, lr_0 = 4.3937e-04
Validation mae = 0.281509
Epoch 12
Loss = 1.4615e-02, PNorm = 157.6365, GNorm = 0.3350, lr_0 = 4.3907e-04
Loss = 1.2326e-02, PNorm = 157.6559, GNorm = 0.5039, lr_0 = 4.3877e-04
Loss = 1.0579e-02, PNorm = 157.6785, GNorm = 0.7222, lr_0 = 4.3846e-04
Loss = 9.9098e-03, PNorm = 157.7022, GNorm = 0.5106, lr_0 = 4.3816e-04
Loss = 1.4507e-02, PNorm = 157.7214, GNorm = 0.3796, lr_0 = 4.3786e-04
Loss = 1.2243e-02, PNorm = 157.7419, GNorm = 0.1840, lr_0 = 4.3756e-04
Loss = 1.2059e-02, PNorm = 157.7623, GNorm = 0.5921, lr_0 = 4.3726e-04
Loss = 1.1477e-02, PNorm = 157.7818, GNorm = 0.2536, lr_0 = 4.3696e-04
Loss = 1.1487e-02, PNorm = 157.7980, GNorm = 0.2211, lr_0 = 4.3667e-04
Loss = 9.9621e-03, PNorm = 157.8148, GNorm = 0.5835, lr_0 = 4.3637e-04
Loss = 1.1002e-02, PNorm = 157.8348, GNorm = 0.1487, lr_0 = 4.3607e-04
Loss = 1.2053e-02, PNorm = 157.8544, GNorm = 0.3586, lr_0 = 4.3577e-04
Loss = 1.0948e-02, PNorm = 157.8756, GNorm = 0.1918, lr_0 = 4.3547e-04
Loss = 1.1210e-02, PNorm = 157.8942, GNorm = 0.2615, lr_0 = 4.3517e-04
Loss = 1.0226e-02, PNorm = 157.9149, GNorm = 0.2099, lr_0 = 4.3487e-04
Loss = 1.0634e-02, PNorm = 157.9320, GNorm = 0.1348, lr_0 = 4.3458e-04
Loss = 9.9131e-03, PNorm = 157.9499, GNorm = 0.3773, lr_0 = 4.3428e-04
Loss = 1.4511e-02, PNorm = 157.9740, GNorm = 0.4352, lr_0 = 4.3398e-04
Loss = 1.0724e-02, PNorm = 158.0005, GNorm = 0.2026, lr_0 = 4.3368e-04
Loss = 9.4194e-03, PNorm = 158.0227, GNorm = 0.3817, lr_0 = 4.3339e-04
Loss = 1.0124e-02, PNorm = 158.0428, GNorm = 0.2371, lr_0 = 4.3309e-04
Loss = 1.1153e-02, PNorm = 158.0616, GNorm = 0.1548, lr_0 = 4.3279e-04
Loss = 9.2823e-03, PNorm = 158.0800, GNorm = 0.5904, lr_0 = 4.3250e-04
Loss = 9.6292e-03, PNorm = 158.0980, GNorm = 0.0892, lr_0 = 4.3220e-04
Loss = 1.1788e-02, PNorm = 158.1178, GNorm = 0.1167, lr_0 = 4.3190e-04
Loss = 1.1065e-02, PNorm = 158.1370, GNorm = 0.6056, lr_0 = 4.3161e-04
Loss = 9.7304e-03, PNorm = 158.1606, GNorm = 0.1480, lr_0 = 4.3131e-04
Loss = 1.0251e-02, PNorm = 158.1804, GNorm = 0.1800, lr_0 = 4.3102e-04
Loss = 9.6927e-03, PNorm = 158.2024, GNorm = 0.2509, lr_0 = 4.3072e-04
Loss = 1.0931e-02, PNorm = 158.2202, GNorm = 0.5061, lr_0 = 4.3043e-04
Loss = 8.9080e-03, PNorm = 158.2393, GNorm = 0.1073, lr_0 = 4.3013e-04
Loss = 1.4789e-02, PNorm = 158.2567, GNorm = 0.4738, lr_0 = 4.2984e-04
Loss = 1.3384e-02, PNorm = 158.2760, GNorm = 0.2815, lr_0 = 4.2954e-04
Loss = 1.2432e-02, PNorm = 158.3002, GNorm = 0.4845, lr_0 = 4.2925e-04
Loss = 9.8357e-03, PNorm = 158.3251, GNorm = 0.2298, lr_0 = 4.2895e-04
Loss = 1.0100e-02, PNorm = 158.3469, GNorm = 0.2909, lr_0 = 4.2866e-04
Loss = 1.0082e-02, PNorm = 158.3693, GNorm = 0.5003, lr_0 = 4.2837e-04
Loss = 8.5979e-03, PNorm = 158.3909, GNorm = 0.2360, lr_0 = 4.2807e-04
Loss = 1.0627e-02, PNorm = 158.4136, GNorm = 0.1664, lr_0 = 4.2778e-04
Loss = 1.0281e-02, PNorm = 158.4353, GNorm = 0.2448, lr_0 = 4.2749e-04
Loss = 7.7029e-03, PNorm = 158.4538, GNorm = 0.2138, lr_0 = 4.2719e-04
Loss = 9.0551e-03, PNorm = 158.4750, GNorm = 0.3543, lr_0 = 4.2690e-04
Loss = 1.2179e-02, PNorm = 158.4956, GNorm = 0.3151, lr_0 = 4.2661e-04
Loss = 9.5881e-03, PNorm = 158.5185, GNorm = 0.2256, lr_0 = 4.2632e-04
Loss = 9.3007e-03, PNorm = 158.5367, GNorm = 0.4872, lr_0 = 4.2602e-04
Loss = 1.0168e-02, PNorm = 158.5554, GNorm = 0.1595, lr_0 = 4.2573e-04
Loss = 1.2492e-02, PNorm = 158.5760, GNorm = 0.3047, lr_0 = 4.2544e-04
Loss = 1.1876e-02, PNorm = 158.5944, GNorm = 0.4166, lr_0 = 4.2515e-04
Loss = 9.5883e-03, PNorm = 158.6114, GNorm = 0.1454, lr_0 = 4.2486e-04
Loss = 8.8988e-03, PNorm = 158.6328, GNorm = 0.1372, lr_0 = 4.2457e-04
Loss = 9.8116e-03, PNorm = 158.6551, GNorm = 0.2760, lr_0 = 4.2428e-04
Loss = 8.7681e-03, PNorm = 158.6771, GNorm = 0.1640, lr_0 = 4.2399e-04
Loss = 1.0112e-02, PNorm = 158.6957, GNorm = 0.1254, lr_0 = 4.2370e-04
Loss = 8.9804e-03, PNorm = 158.7190, GNorm = 0.3966, lr_0 = 4.2340e-04
Loss = 1.0555e-02, PNorm = 158.7399, GNorm = 0.2121, lr_0 = 4.2311e-04
Loss = 1.0758e-02, PNorm = 158.7617, GNorm = 0.4173, lr_0 = 4.2283e-04
Loss = 1.1405e-02, PNorm = 158.7858, GNorm = 0.5580, lr_0 = 4.2254e-04
Loss = 1.0800e-02, PNorm = 158.8111, GNorm = 0.4851, lr_0 = 4.2225e-04
Loss = 8.4995e-03, PNorm = 158.8354, GNorm = 0.1987, lr_0 = 4.2196e-04
Loss = 9.1575e-03, PNorm = 158.8629, GNorm = 0.3352, lr_0 = 4.2167e-04
Loss = 1.6285e-02, PNorm = 158.8835, GNorm = 0.2770, lr_0 = 4.2138e-04
Loss = 8.7694e-03, PNorm = 158.9030, GNorm = 0.1684, lr_0 = 4.2109e-04
Loss = 1.1268e-02, PNorm = 158.9244, GNorm = 0.8605, lr_0 = 4.2080e-04
Loss = 1.1556e-02, PNorm = 158.9461, GNorm = 0.3038, lr_0 = 4.2051e-04
Loss = 8.7347e-03, PNorm = 158.9668, GNorm = 0.1903, lr_0 = 4.2023e-04
Loss = 1.0372e-02, PNorm = 158.9858, GNorm = 0.5529, lr_0 = 4.1994e-04
Loss = 1.0903e-02, PNorm = 159.0055, GNorm = 0.1660, lr_0 = 4.1965e-04
Loss = 1.2739e-02, PNorm = 159.0298, GNorm = 0.1512, lr_0 = 4.1936e-04
Loss = 9.7398e-03, PNorm = 159.0537, GNorm = 0.1913, lr_0 = 4.1907e-04
Loss = 9.1083e-03, PNorm = 159.0732, GNorm = 0.4043, lr_0 = 4.1879e-04
Loss = 9.5083e-03, PNorm = 159.0951, GNorm = 0.5051, lr_0 = 4.1850e-04
Loss = 9.8388e-03, PNorm = 159.1148, GNorm = 0.3032, lr_0 = 4.1821e-04
Loss = 9.5022e-03, PNorm = 159.1371, GNorm = 0.3044, lr_0 = 4.1793e-04
Loss = 1.0349e-02, PNorm = 159.1569, GNorm = 0.3443, lr_0 = 4.1764e-04
Loss = 1.0514e-02, PNorm = 159.1786, GNorm = 0.1342, lr_0 = 4.1736e-04
Loss = 1.1291e-02, PNorm = 159.2014, GNorm = 0.2748, lr_0 = 4.1707e-04
Loss = 9.3154e-03, PNorm = 159.2272, GNorm = 0.5769, lr_0 = 4.1678e-04
Loss = 1.1646e-02, PNorm = 159.2532, GNorm = 0.1513, lr_0 = 4.1650e-04
Loss = 8.8500e-03, PNorm = 159.2815, GNorm = 0.3847, lr_0 = 4.1621e-04
Loss = 1.0142e-02, PNorm = 159.3042, GNorm = 1.0562, lr_0 = 4.1593e-04
Loss = 1.0525e-02, PNorm = 159.3281, GNorm = 0.1721, lr_0 = 4.1564e-04
Loss = 1.0382e-02, PNorm = 159.3495, GNorm = 0.3893, lr_0 = 4.1536e-04
Loss = 1.0174e-02, PNorm = 159.3691, GNorm = 0.3165, lr_0 = 4.1507e-04
Loss = 9.0745e-03, PNorm = 159.3900, GNorm = 0.3818, lr_0 = 4.1479e-04
Loss = 1.1141e-02, PNorm = 159.4117, GNorm = 0.1634, lr_0 = 4.1450e-04
Loss = 1.2297e-02, PNorm = 159.4310, GNorm = 0.6157, lr_0 = 4.1422e-04
Loss = 1.0010e-02, PNorm = 159.4522, GNorm = 0.4346, lr_0 = 4.1394e-04
Loss = 1.0165e-02, PNorm = 159.4777, GNorm = 0.6258, lr_0 = 4.1365e-04
Loss = 1.0629e-02, PNorm = 159.4989, GNorm = 0.2552, lr_0 = 4.1337e-04
Loss = 1.1119e-02, PNorm = 159.5210, GNorm = 0.3139, lr_0 = 4.1309e-04
Loss = 9.5964e-03, PNorm = 159.5438, GNorm = 0.2067, lr_0 = 4.1280e-04
Loss = 1.0761e-02, PNorm = 159.5675, GNorm = 0.3161, lr_0 = 4.1252e-04
Loss = 8.7853e-03, PNorm = 159.5897, GNorm = 0.1444, lr_0 = 4.1224e-04
Loss = 1.0250e-02, PNorm = 159.6112, GNorm = 0.4202, lr_0 = 4.1196e-04
Loss = 9.5053e-03, PNorm = 159.6326, GNorm = 0.2158, lr_0 = 4.1167e-04
Loss = 9.0995e-03, PNorm = 159.6535, GNorm = 0.2059, lr_0 = 4.1139e-04
Loss = 9.2138e-03, PNorm = 159.6742, GNorm = 0.4832, lr_0 = 4.1111e-04
Loss = 1.1230e-02, PNorm = 159.6972, GNorm = 0.4335, lr_0 = 4.1083e-04
Loss = 9.3551e-03, PNorm = 159.7228, GNorm = 0.1732, lr_0 = 4.1055e-04
Loss = 9.8433e-03, PNorm = 159.7519, GNorm = 0.2183, lr_0 = 4.1027e-04
Loss = 1.0822e-02, PNorm = 159.7779, GNorm = 0.6504, lr_0 = 4.0998e-04
Loss = 9.6078e-03, PNorm = 159.7993, GNorm = 0.2100, lr_0 = 4.0970e-04
Loss = 8.7692e-03, PNorm = 159.8245, GNorm = 0.1147, lr_0 = 4.0942e-04
Loss = 9.8231e-03, PNorm = 159.8477, GNorm = 0.7677, lr_0 = 4.0914e-04
Loss = 9.5728e-03, PNorm = 159.8660, GNorm = 0.1507, lr_0 = 4.0886e-04
Loss = 1.2442e-02, PNorm = 159.8912, GNorm = 0.2419, lr_0 = 4.0858e-04
Loss = 1.0141e-02, PNorm = 159.9183, GNorm = 0.2874, lr_0 = 4.0830e-04
Loss = 8.3645e-03, PNorm = 159.9431, GNorm = 0.1579, lr_0 = 4.0802e-04
Loss = 7.7525e-03, PNorm = 159.9664, GNorm = 0.3642, lr_0 = 4.0774e-04
Loss = 8.6980e-03, PNorm = 159.9871, GNorm = 0.4469, lr_0 = 4.0746e-04
Loss = 1.4463e-02, PNorm = 160.0074, GNorm = 0.4714, lr_0 = 4.0718e-04
Loss = 9.1093e-03, PNorm = 160.0326, GNorm = 0.0954, lr_0 = 4.0691e-04
Loss = 8.4163e-03, PNorm = 160.0567, GNorm = 0.2885, lr_0 = 4.0663e-04
Loss = 9.1584e-03, PNorm = 160.0775, GNorm = 0.3253, lr_0 = 4.0635e-04
Loss = 1.0287e-02, PNorm = 160.0987, GNorm = 0.3813, lr_0 = 4.0607e-04
Loss = 1.2204e-02, PNorm = 160.1238, GNorm = 0.1538, lr_0 = 4.0579e-04
Loss = 8.6087e-03, PNorm = 160.1445, GNorm = 0.1874, lr_0 = 4.0551e-04
Loss = 8.7290e-03, PNorm = 160.1677, GNorm = 0.2392, lr_0 = 4.0524e-04
Loss = 1.0511e-02, PNorm = 160.1895, GNorm = 0.1887, lr_0 = 4.0496e-04
Loss = 1.4700e-02, PNorm = 160.2111, GNorm = 0.1652, lr_0 = 4.0468e-04
Validation mae = 0.281023
Epoch 13
Loss = 9.4910e-03, PNorm = 160.2300, GNorm = 0.1891, lr_0 = 4.0440e-04
Loss = 9.6372e-03, PNorm = 160.2474, GNorm = 0.8108, lr_0 = 4.0413e-04
Loss = 9.4931e-03, PNorm = 160.2612, GNorm = 0.3944, lr_0 = 4.0385e-04
Loss = 9.7894e-03, PNorm = 160.2779, GNorm = 0.2044, lr_0 = 4.0357e-04
Loss = 8.3325e-03, PNorm = 160.2965, GNorm = 0.1099, lr_0 = 4.0330e-04
Loss = 1.0609e-02, PNorm = 160.3128, GNorm = 0.2636, lr_0 = 4.0302e-04
Loss = 1.0690e-02, PNorm = 160.3295, GNorm = 0.4529, lr_0 = 4.0274e-04
Loss = 1.0266e-02, PNorm = 160.3435, GNorm = 0.3518, lr_0 = 4.0247e-04
Loss = 9.6722e-03, PNorm = 160.3609, GNorm = 0.4058, lr_0 = 4.0219e-04
Loss = 9.3983e-03, PNorm = 160.3743, GNorm = 0.3370, lr_0 = 4.0192e-04
Loss = 8.9365e-03, PNorm = 160.3902, GNorm = 0.2888, lr_0 = 4.0164e-04
Loss = 1.2458e-02, PNorm = 160.4064, GNorm = 0.3397, lr_0 = 4.0137e-04
Loss = 8.8171e-03, PNorm = 160.4239, GNorm = 0.2090, lr_0 = 4.0109e-04
Loss = 8.0066e-03, PNorm = 160.4421, GNorm = 0.2253, lr_0 = 4.0082e-04
Loss = 1.0151e-02, PNorm = 160.4607, GNorm = 0.3896, lr_0 = 4.0054e-04
Loss = 8.5926e-03, PNorm = 160.4771, GNorm = 0.1169, lr_0 = 4.0027e-04
Loss = 9.2767e-03, PNorm = 160.4965, GNorm = 0.1664, lr_0 = 3.9999e-04
Loss = 6.7155e-03, PNorm = 160.5131, GNorm = 0.1719, lr_0 = 3.9972e-04
Loss = 7.3933e-03, PNorm = 160.5255, GNorm = 0.1756, lr_0 = 3.9945e-04
Loss = 8.6422e-03, PNorm = 160.5393, GNorm = 0.1311, lr_0 = 3.9917e-04
Loss = 7.8765e-03, PNorm = 160.5526, GNorm = 0.1001, lr_0 = 3.9890e-04
Loss = 9.2243e-03, PNorm = 160.5692, GNorm = 0.3208, lr_0 = 3.9863e-04
Loss = 8.2603e-03, PNorm = 160.5881, GNorm = 0.3047, lr_0 = 3.9835e-04
Loss = 8.4650e-03, PNorm = 160.6028, GNorm = 0.3791, lr_0 = 3.9808e-04
Loss = 7.4569e-03, PNorm = 160.6187, GNorm = 0.2328, lr_0 = 3.9781e-04
Loss = 8.7497e-03, PNorm = 160.6328, GNorm = 0.0855, lr_0 = 3.9753e-04
Loss = 6.4915e-03, PNorm = 160.6486, GNorm = 0.3163, lr_0 = 3.9726e-04
Loss = 9.5332e-03, PNorm = 160.6640, GNorm = 0.2784, lr_0 = 3.9699e-04
Loss = 7.3478e-03, PNorm = 160.6811, GNorm = 0.1102, lr_0 = 3.9672e-04
Loss = 8.2768e-03, PNorm = 160.6981, GNorm = 0.1597, lr_0 = 3.9645e-04
Loss = 9.0189e-03, PNorm = 160.7132, GNorm = 0.3712, lr_0 = 3.9617e-04
Loss = 9.5287e-03, PNorm = 160.7315, GNorm = 0.2708, lr_0 = 3.9590e-04
Loss = 1.0366e-02, PNorm = 160.7458, GNorm = 0.1212, lr_0 = 3.9563e-04
Loss = 7.7702e-03, PNorm = 160.7628, GNorm = 0.1467, lr_0 = 3.9536e-04
Loss = 1.1241e-02, PNorm = 160.7801, GNorm = 0.1167, lr_0 = 3.9509e-04
Loss = 7.3306e-03, PNorm = 160.7974, GNorm = 0.3061, lr_0 = 3.9482e-04
Loss = 6.7128e-03, PNorm = 160.8162, GNorm = 0.1450, lr_0 = 3.9455e-04
Loss = 7.0675e-03, PNorm = 160.8346, GNorm = 0.1775, lr_0 = 3.9428e-04
Loss = 7.4601e-03, PNorm = 160.8488, GNorm = 0.2010, lr_0 = 3.9401e-04
Loss = 7.7323e-03, PNorm = 160.8646, GNorm = 0.1827, lr_0 = 3.9374e-04
Loss = 9.9285e-03, PNorm = 160.8830, GNorm = 0.0955, lr_0 = 3.9347e-04
Loss = 8.0910e-03, PNorm = 160.9032, GNorm = 0.1257, lr_0 = 3.9320e-04
Loss = 7.9585e-03, PNorm = 160.9211, GNorm = 0.4774, lr_0 = 3.9293e-04
Loss = 7.7516e-03, PNorm = 160.9388, GNorm = 0.4255, lr_0 = 3.9266e-04
Loss = 7.3608e-03, PNorm = 160.9604, GNorm = 0.1554, lr_0 = 3.9239e-04
Loss = 7.5625e-03, PNorm = 160.9822, GNorm = 0.2325, lr_0 = 3.9212e-04
Loss = 8.0587e-03, PNorm = 161.0018, GNorm = 0.1556, lr_0 = 3.9185e-04
Loss = 7.4022e-03, PNorm = 161.0224, GNorm = 0.1514, lr_0 = 3.9159e-04
Loss = 7.5562e-03, PNorm = 161.0395, GNorm = 0.4799, lr_0 = 3.9132e-04
Loss = 1.0137e-02, PNorm = 161.0583, GNorm = 0.3653, lr_0 = 3.9105e-04
Loss = 7.3905e-03, PNorm = 161.0761, GNorm = 0.0878, lr_0 = 3.9078e-04
Loss = 9.3026e-03, PNorm = 161.0963, GNorm = 0.3565, lr_0 = 3.9051e-04
Loss = 9.8944e-03, PNorm = 161.1188, GNorm = 0.2575, lr_0 = 3.9025e-04
Loss = 7.9881e-03, PNorm = 161.1388, GNorm = 0.0842, lr_0 = 3.8998e-04
Loss = 8.7404e-03, PNorm = 161.1558, GNorm = 0.2733, lr_0 = 3.8971e-04
Loss = 9.9881e-03, PNorm = 161.1748, GNorm = 0.3030, lr_0 = 3.8945e-04
Loss = 7.1063e-03, PNorm = 161.1975, GNorm = 0.1765, lr_0 = 3.8918e-04
Loss = 8.2862e-03, PNorm = 161.2162, GNorm = 0.1854, lr_0 = 3.8891e-04
Loss = 8.7545e-03, PNorm = 161.2310, GNorm = 0.1239, lr_0 = 3.8865e-04
Loss = 7.7334e-03, PNorm = 161.2477, GNorm = 0.2038, lr_0 = 3.8838e-04
Loss = 8.9888e-03, PNorm = 161.2625, GNorm = 0.3458, lr_0 = 3.8811e-04
Loss = 7.1457e-03, PNorm = 161.2795, GNorm = 0.2652, lr_0 = 3.8785e-04
Loss = 8.3148e-03, PNorm = 161.2976, GNorm = 0.2754, lr_0 = 3.8758e-04
Loss = 8.0404e-03, PNorm = 161.3152, GNorm = 0.5024, lr_0 = 3.8732e-04
Loss = 7.4814e-03, PNorm = 161.3365, GNorm = 0.3616, lr_0 = 3.8705e-04
Loss = 7.8006e-03, PNorm = 161.3558, GNorm = 0.1528, lr_0 = 3.8679e-04
Loss = 9.5845e-03, PNorm = 161.3748, GNorm = 0.2373, lr_0 = 3.8652e-04
Loss = 1.2958e-02, PNorm = 161.3937, GNorm = 0.3827, lr_0 = 3.8626e-04
Loss = 7.4296e-03, PNorm = 161.4092, GNorm = 0.3108, lr_0 = 3.8599e-04
Loss = 9.9706e-03, PNorm = 161.4281, GNorm = 0.0933, lr_0 = 3.8573e-04
Loss = 8.6013e-03, PNorm = 161.4460, GNorm = 0.1503, lr_0 = 3.8546e-04
Loss = 8.0748e-03, PNorm = 161.4651, GNorm = 0.2142, lr_0 = 3.8520e-04
Loss = 1.1285e-02, PNorm = 161.4823, GNorm = 0.1066, lr_0 = 3.8493e-04
Loss = 8.9211e-03, PNorm = 161.4990, GNorm = 0.2842, lr_0 = 3.8467e-04
Loss = 9.0034e-03, PNorm = 161.5186, GNorm = 0.2219, lr_0 = 3.8441e-04
Loss = 8.6331e-03, PNorm = 161.5356, GNorm = 0.3540, lr_0 = 3.8414e-04
Loss = 9.3012e-03, PNorm = 161.5530, GNorm = 0.1215, lr_0 = 3.8388e-04
Loss = 7.9000e-03, PNorm = 161.5711, GNorm = 0.3427, lr_0 = 3.8362e-04
Loss = 7.0908e-03, PNorm = 161.5924, GNorm = 0.3399, lr_0 = 3.8336e-04
Loss = 7.3518e-03, PNorm = 161.6129, GNorm = 0.1131, lr_0 = 3.8309e-04
Loss = 8.8100e-03, PNorm = 161.6316, GNorm = 0.1318, lr_0 = 3.8283e-04
Loss = 7.7020e-03, PNorm = 161.6465, GNorm = 0.1466, lr_0 = 3.8257e-04
Loss = 9.1582e-03, PNorm = 161.6629, GNorm = 0.2780, lr_0 = 3.8231e-04
Loss = 7.2728e-03, PNorm = 161.6801, GNorm = 0.3013, lr_0 = 3.8204e-04
Loss = 7.1664e-03, PNorm = 161.6952, GNorm = 0.3127, lr_0 = 3.8178e-04
Loss = 7.2767e-03, PNorm = 161.7128, GNorm = 0.0952, lr_0 = 3.8152e-04
Loss = 9.0463e-03, PNorm = 161.7321, GNorm = 0.2093, lr_0 = 3.8126e-04
Loss = 7.8220e-03, PNorm = 161.7546, GNorm = 0.4064, lr_0 = 3.8100e-04
Loss = 8.2139e-03, PNorm = 161.7733, GNorm = 0.3940, lr_0 = 3.8074e-04
Loss = 1.0629e-02, PNorm = 161.7913, GNorm = 0.1875, lr_0 = 3.8048e-04
Loss = 9.2887e-03, PNorm = 161.8124, GNorm = 0.4615, lr_0 = 3.8022e-04
Loss = 7.8137e-03, PNorm = 161.8342, GNorm = 0.3594, lr_0 = 3.7995e-04
Loss = 1.1572e-02, PNorm = 161.8509, GNorm = 0.3097, lr_0 = 3.7969e-04
Loss = 7.1587e-03, PNorm = 161.8683, GNorm = 0.2896, lr_0 = 3.7943e-04
Loss = 7.8372e-03, PNorm = 161.8888, GNorm = 0.1942, lr_0 = 3.7917e-04
Loss = 7.4797e-03, PNorm = 161.9091, GNorm = 0.2334, lr_0 = 3.7891e-04
Loss = 1.0715e-02, PNorm = 161.9285, GNorm = 0.5281, lr_0 = 3.7866e-04
Loss = 8.3961e-03, PNorm = 161.9465, GNorm = 0.1554, lr_0 = 3.7840e-04
Loss = 8.2318e-03, PNorm = 161.9681, GNorm = 0.2296, lr_0 = 3.7814e-04
Loss = 7.0358e-03, PNorm = 161.9863, GNorm = 0.1793, lr_0 = 3.7788e-04
Loss = 7.4873e-03, PNorm = 162.0046, GNorm = 0.5100, lr_0 = 3.7762e-04
Loss = 8.3754e-03, PNorm = 162.0207, GNorm = 0.5173, lr_0 = 3.7736e-04
Loss = 7.8196e-03, PNorm = 162.0456, GNorm = 0.1685, lr_0 = 3.7710e-04
Loss = 7.8120e-03, PNorm = 162.0630, GNorm = 0.1662, lr_0 = 3.7684e-04
Loss = 8.0523e-03, PNorm = 162.0784, GNorm = 0.1736, lr_0 = 3.7659e-04
Loss = 9.0115e-03, PNorm = 162.0982, GNorm = 0.3948, lr_0 = 3.7633e-04
Loss = 1.0084e-02, PNorm = 162.1153, GNorm = 0.1575, lr_0 = 3.7607e-04
Loss = 8.1365e-03, PNorm = 162.1338, GNorm = 0.2579, lr_0 = 3.7581e-04
Loss = 7.7671e-03, PNorm = 162.1533, GNorm = 0.2016, lr_0 = 3.7555e-04
Loss = 7.6655e-03, PNorm = 162.1741, GNorm = 0.4932, lr_0 = 3.7530e-04
Loss = 8.5593e-03, PNorm = 162.1942, GNorm = 0.3987, lr_0 = 3.7504e-04
Loss = 8.6392e-03, PNorm = 162.2124, GNorm = 0.2633, lr_0 = 3.7478e-04
Loss = 8.8275e-03, PNorm = 162.2263, GNorm = 0.4138, lr_0 = 3.7453e-04
Loss = 8.8681e-03, PNorm = 162.2499, GNorm = 0.2779, lr_0 = 3.7427e-04
Loss = 8.3009e-03, PNorm = 162.2674, GNorm = 0.2773, lr_0 = 3.7401e-04
Loss = 8.1173e-03, PNorm = 162.2894, GNorm = 0.3875, lr_0 = 3.7376e-04
Loss = 7.0210e-03, PNorm = 162.3107, GNorm = 0.3838, lr_0 = 3.7350e-04
Loss = 8.3790e-03, PNorm = 162.3271, GNorm = 0.5728, lr_0 = 3.7325e-04
Loss = 7.7047e-03, PNorm = 162.3457, GNorm = 0.3779, lr_0 = 3.7299e-04
Loss = 1.0405e-02, PNorm = 162.3609, GNorm = 0.2230, lr_0 = 3.7273e-04
Validation mae = 0.280686
Epoch 14
Loss = 8.9368e-03, PNorm = 162.3738, GNorm = 0.6907, lr_0 = 3.7248e-04
Loss = 7.5447e-03, PNorm = 162.3890, GNorm = 0.1055, lr_0 = 3.7222e-04
Loss = 8.4323e-03, PNorm = 162.4079, GNorm = 0.6640, lr_0 = 3.7197e-04
Loss = 9.9415e-03, PNorm = 162.4191, GNorm = 0.1257, lr_0 = 3.7171e-04
Loss = 9.6179e-03, PNorm = 162.4348, GNorm = 0.1853, lr_0 = 3.7146e-04
Loss = 9.3070e-03, PNorm = 162.4493, GNorm = 0.1291, lr_0 = 3.7120e-04
Loss = 9.1434e-03, PNorm = 162.4672, GNorm = 0.4579, lr_0 = 3.7095e-04
Loss = 7.2203e-03, PNorm = 162.4819, GNorm = 0.2340, lr_0 = 3.7070e-04
Loss = 6.4026e-03, PNorm = 162.4958, GNorm = 0.2655, lr_0 = 3.7044e-04
Loss = 9.3725e-03, PNorm = 162.5122, GNorm = 0.5458, lr_0 = 3.7019e-04
Loss = 7.1422e-03, PNorm = 162.5275, GNorm = 0.2039, lr_0 = 3.6993e-04
Loss = 6.7606e-03, PNorm = 162.5393, GNorm = 0.7836, lr_0 = 3.6968e-04
Loss = 7.6523e-03, PNorm = 162.5547, GNorm = 0.1481, lr_0 = 3.6943e-04
Loss = 8.4752e-03, PNorm = 162.5713, GNorm = 0.1309, lr_0 = 3.6917e-04
Loss = 7.6977e-03, PNorm = 162.5854, GNorm = 0.2586, lr_0 = 3.6892e-04
Loss = 7.4556e-03, PNorm = 162.6032, GNorm = 0.2742, lr_0 = 3.6867e-04
Loss = 7.0423e-03, PNorm = 162.6204, GNorm = 0.3348, lr_0 = 3.6842e-04
Loss = 6.8677e-03, PNorm = 162.6392, GNorm = 0.2959, lr_0 = 3.6816e-04
Loss = 7.6197e-03, PNorm = 162.6578, GNorm = 0.3263, lr_0 = 3.6791e-04
Loss = 7.3968e-03, PNorm = 162.6715, GNorm = 0.4514, lr_0 = 3.6766e-04
Loss = 6.5311e-03, PNorm = 162.6890, GNorm = 0.2674, lr_0 = 3.6741e-04
Loss = 5.8474e-03, PNorm = 162.7012, GNorm = 0.1684, lr_0 = 3.6716e-04
Loss = 5.8739e-03, PNorm = 162.7143, GNorm = 0.3195, lr_0 = 3.6690e-04
Loss = 7.2079e-03, PNorm = 162.7266, GNorm = 0.2001, lr_0 = 3.6665e-04
Loss = 6.1467e-03, PNorm = 162.7416, GNorm = 0.2888, lr_0 = 3.6640e-04
Loss = 7.6389e-03, PNorm = 162.7552, GNorm = 0.2770, lr_0 = 3.6615e-04
Loss = 7.7556e-03, PNorm = 162.7705, GNorm = 0.2270, lr_0 = 3.6590e-04
Loss = 6.1651e-03, PNorm = 162.7812, GNorm = 0.2454, lr_0 = 3.6565e-04
Loss = 8.5163e-03, PNorm = 162.7941, GNorm = 0.2320, lr_0 = 3.6540e-04
Loss = 6.8537e-03, PNorm = 162.8057, GNorm = 0.1248, lr_0 = 3.6515e-04
Loss = 6.9442e-03, PNorm = 162.8216, GNorm = 0.1721, lr_0 = 3.6490e-04
Loss = 5.9911e-03, PNorm = 162.8350, GNorm = 0.2764, lr_0 = 3.6465e-04
Loss = 7.2593e-03, PNorm = 162.8487, GNorm = 0.1317, lr_0 = 3.6440e-04
Loss = 6.7049e-03, PNorm = 162.8593, GNorm = 0.1115, lr_0 = 3.6415e-04
Loss = 6.6356e-03, PNorm = 162.8763, GNorm = 0.2006, lr_0 = 3.6390e-04
Loss = 6.2910e-03, PNorm = 162.8903, GNorm = 0.2437, lr_0 = 3.6365e-04
Loss = 5.9862e-03, PNorm = 162.9064, GNorm = 0.1731, lr_0 = 3.6340e-04
Loss = 5.8338e-03, PNorm = 162.9233, GNorm = 0.1386, lr_0 = 3.6315e-04
Loss = 5.2179e-03, PNorm = 162.9381, GNorm = 0.0985, lr_0 = 3.6290e-04
Loss = 8.4399e-03, PNorm = 162.9521, GNorm = 0.4720, lr_0 = 3.6266e-04
Loss = 6.9045e-03, PNorm = 162.9672, GNorm = 0.1758, lr_0 = 3.6241e-04
Loss = 6.9299e-03, PNorm = 162.9805, GNorm = 0.4505, lr_0 = 3.6216e-04
Loss = 5.4472e-03, PNorm = 162.9947, GNorm = 0.1511, lr_0 = 3.6191e-04
Loss = 6.5426e-03, PNorm = 163.0060, GNorm = 0.2932, lr_0 = 3.6166e-04
Loss = 6.5721e-03, PNorm = 163.0172, GNorm = 0.3153, lr_0 = 3.6141e-04
Loss = 6.1166e-03, PNorm = 163.0298, GNorm = 0.3150, lr_0 = 3.6117e-04
Loss = 5.3802e-03, PNorm = 163.0433, GNorm = 0.1212, lr_0 = 3.6092e-04
Loss = 6.3244e-03, PNorm = 163.0572, GNorm = 0.4526, lr_0 = 3.6067e-04
Loss = 5.8328e-03, PNorm = 163.0724, GNorm = 0.2223, lr_0 = 3.6043e-04
Loss = 8.2587e-03, PNorm = 163.0855, GNorm = 0.1316, lr_0 = 3.6018e-04
Loss = 7.0567e-03, PNorm = 163.1000, GNorm = 0.4127, lr_0 = 3.5993e-04
Loss = 8.7453e-03, PNorm = 163.1113, GNorm = 0.2280, lr_0 = 3.5969e-04
Loss = 8.4470e-03, PNorm = 163.1280, GNorm = 0.2831, lr_0 = 3.5944e-04
Loss = 5.8543e-03, PNorm = 163.1454, GNorm = 0.1483, lr_0 = 3.5919e-04
Loss = 5.8223e-03, PNorm = 163.1619, GNorm = 0.1700, lr_0 = 3.5895e-04
Loss = 9.6035e-03, PNorm = 163.1771, GNorm = 0.3351, lr_0 = 3.5870e-04
Loss = 8.8621e-03, PNorm = 163.1933, GNorm = 0.1270, lr_0 = 3.5845e-04
Loss = 7.0892e-03, PNorm = 163.2055, GNorm = 0.2739, lr_0 = 3.5821e-04
Loss = 9.2299e-03, PNorm = 163.2170, GNorm = 0.2606, lr_0 = 3.5796e-04
Loss = 7.5900e-03, PNorm = 163.2343, GNorm = 0.1458, lr_0 = 3.5772e-04
Loss = 8.1246e-03, PNorm = 163.2518, GNorm = 0.1507, lr_0 = 3.5747e-04
Loss = 7.4510e-03, PNorm = 163.2671, GNorm = 0.3003, lr_0 = 3.5723e-04
Loss = 6.3451e-03, PNorm = 163.2799, GNorm = 0.2840, lr_0 = 3.5698e-04
Loss = 7.5442e-03, PNorm = 163.2954, GNorm = 0.1649, lr_0 = 3.5674e-04
Loss = 6.8454e-03, PNorm = 163.3100, GNorm = 0.1829, lr_0 = 3.5650e-04
Loss = 6.8003e-03, PNorm = 163.3227, GNorm = 0.2599, lr_0 = 3.5625e-04
Loss = 6.2836e-03, PNorm = 163.3376, GNorm = 0.3234, lr_0 = 3.5601e-04
Loss = 7.1211e-03, PNorm = 163.3511, GNorm = 0.1311, lr_0 = 3.5576e-04
Loss = 7.2554e-03, PNorm = 163.3655, GNorm = 0.2286, lr_0 = 3.5552e-04
Loss = 5.8559e-03, PNorm = 163.3805, GNorm = 0.1356, lr_0 = 3.5528e-04
Loss = 6.8624e-03, PNorm = 163.3987, GNorm = 0.1011, lr_0 = 3.5503e-04
Loss = 7.7609e-03, PNorm = 163.4140, GNorm = 0.2206, lr_0 = 3.5479e-04
Loss = 7.8983e-03, PNorm = 163.4290, GNorm = 0.3135, lr_0 = 3.5455e-04
Loss = 5.8428e-03, PNorm = 163.4448, GNorm = 0.3427, lr_0 = 3.5430e-04
Loss = 6.7065e-03, PNorm = 163.4587, GNorm = 0.3056, lr_0 = 3.5406e-04
Loss = 7.1887e-03, PNorm = 163.4750, GNorm = 0.2360, lr_0 = 3.5382e-04
Loss = 8.7702e-03, PNorm = 163.4937, GNorm = 0.5879, lr_0 = 3.5358e-04
Loss = 8.1048e-03, PNorm = 163.5112, GNorm = 0.2456, lr_0 = 3.5333e-04
Loss = 7.3725e-03, PNorm = 163.5264, GNorm = 0.1788, lr_0 = 3.5309e-04
Loss = 6.3078e-03, PNorm = 163.5427, GNorm = 0.3884, lr_0 = 3.5285e-04
Loss = 5.6094e-03, PNorm = 163.5572, GNorm = 0.0942, lr_0 = 3.5261e-04
Loss = 6.3188e-03, PNorm = 163.5764, GNorm = 0.2010, lr_0 = 3.5237e-04
Loss = 8.0933e-03, PNorm = 163.5969, GNorm = 0.3025, lr_0 = 3.5212e-04
Loss = 6.3047e-03, PNorm = 163.6118, GNorm = 0.1983, lr_0 = 3.5188e-04
Loss = 7.9480e-03, PNorm = 163.6275, GNorm = 0.1792, lr_0 = 3.5164e-04
Loss = 7.3651e-03, PNorm = 163.6442, GNorm = 0.2211, lr_0 = 3.5140e-04
Loss = 6.1610e-03, PNorm = 163.6561, GNorm = 0.1311, lr_0 = 3.5116e-04
Loss = 6.9311e-03, PNorm = 163.6723, GNorm = 0.2751, lr_0 = 3.5092e-04
Loss = 8.0754e-03, PNorm = 163.6870, GNorm = 0.4332, lr_0 = 3.5068e-04
Loss = 7.5518e-03, PNorm = 163.7020, GNorm = 0.1135, lr_0 = 3.5044e-04
Loss = 7.2682e-03, PNorm = 163.7172, GNorm = 0.1446, lr_0 = 3.5020e-04
Loss = 1.0108e-02, PNorm = 163.7345, GNorm = 0.1269, lr_0 = 3.4996e-04
Loss = 7.2199e-03, PNorm = 163.7516, GNorm = 0.1345, lr_0 = 3.4972e-04
Loss = 9.8579e-03, PNorm = 163.7696, GNorm = 0.3230, lr_0 = 3.4948e-04
Loss = 6.5111e-03, PNorm = 163.7855, GNorm = 0.1415, lr_0 = 3.4924e-04
Loss = 7.0059e-03, PNorm = 163.8005, GNorm = 0.1274, lr_0 = 3.4900e-04
Loss = 5.8317e-03, PNorm = 163.8185, GNorm = 0.3467, lr_0 = 3.4876e-04
Loss = 6.2418e-03, PNorm = 163.8327, GNorm = 0.2796, lr_0 = 3.4852e-04
Loss = 6.0028e-03, PNorm = 163.8469, GNorm = 0.1479, lr_0 = 3.4828e-04
Loss = 5.8701e-03, PNorm = 163.8592, GNorm = 0.2817, lr_0 = 3.4805e-04
Loss = 7.4081e-03, PNorm = 163.8724, GNorm = 0.2391, lr_0 = 3.4781e-04
Loss = 6.6366e-03, PNorm = 163.8886, GNorm = 0.2102, lr_0 = 3.4757e-04
Loss = 7.1795e-03, PNorm = 163.9059, GNorm = 0.3166, lr_0 = 3.4733e-04
Loss = 6.0611e-03, PNorm = 163.9232, GNorm = 0.2345, lr_0 = 3.4709e-04
Loss = 6.5610e-03, PNorm = 163.9372, GNorm = 0.0743, lr_0 = 3.4686e-04
Loss = 7.1435e-03, PNorm = 163.9530, GNorm = 0.1900, lr_0 = 3.4662e-04
Loss = 9.4253e-03, PNorm = 163.9674, GNorm = 0.2042, lr_0 = 3.4638e-04
Loss = 7.2564e-03, PNorm = 163.9839, GNorm = 0.3731, lr_0 = 3.4614e-04
Loss = 6.9005e-03, PNorm = 164.0002, GNorm = 0.3162, lr_0 = 3.4591e-04
Loss = 7.7196e-03, PNorm = 164.0173, GNorm = 0.1043, lr_0 = 3.4567e-04
Loss = 6.5293e-03, PNorm = 164.0385, GNorm = 0.1985, lr_0 = 3.4543e-04
Loss = 6.8573e-03, PNorm = 164.0533, GNorm = 0.4707, lr_0 = 3.4520e-04
Loss = 6.5019e-03, PNorm = 164.0687, GNorm = 0.2451, lr_0 = 3.4496e-04
Loss = 6.9989e-03, PNorm = 164.0862, GNorm = 0.3134, lr_0 = 3.4472e-04
Loss = 7.6771e-03, PNorm = 164.1029, GNorm = 0.2825, lr_0 = 3.4449e-04
Loss = 8.2693e-03, PNorm = 164.1208, GNorm = 0.5462, lr_0 = 3.4425e-04
Loss = 6.7612e-03, PNorm = 164.1359, GNorm = 0.4999, lr_0 = 3.4402e-04
Loss = 9.1241e-03, PNorm = 164.1514, GNorm = 0.5021, lr_0 = 3.4378e-04
Loss = 8.3285e-03, PNorm = 164.1649, GNorm = 0.2406, lr_0 = 3.4354e-04
Loss = 8.5762e-03, PNorm = 164.1785, GNorm = 0.2541, lr_0 = 3.4331e-04
Validation mae = 0.279058
Epoch 15
Loss = 6.6971e-03, PNorm = 164.1866, GNorm = 0.2170, lr_0 = 3.4307e-04
Loss = 5.7080e-03, PNorm = 164.1973, GNorm = 0.2579, lr_0 = 3.4284e-04
Loss = 5.9494e-03, PNorm = 164.2080, GNorm = 0.2744, lr_0 = 3.4260e-04
Loss = 7.7009e-03, PNorm = 164.2230, GNorm = 0.0980, lr_0 = 3.4237e-04
Loss = 7.7154e-03, PNorm = 164.2381, GNorm = 0.2827, lr_0 = 3.4213e-04
Loss = 5.5643e-03, PNorm = 164.2518, GNorm = 0.1792, lr_0 = 3.4190e-04
Loss = 6.0244e-03, PNorm = 164.2649, GNorm = 0.1212, lr_0 = 3.4167e-04
Loss = 5.5098e-03, PNorm = 164.2745, GNorm = 0.1277, lr_0 = 3.4143e-04
Loss = 7.0259e-03, PNorm = 164.2824, GNorm = 0.1760, lr_0 = 3.4120e-04
Loss = 6.3321e-03, PNorm = 164.2952, GNorm = 0.1272, lr_0 = 3.4096e-04
Loss = 5.9545e-03, PNorm = 164.3058, GNorm = 0.0786, lr_0 = 3.4073e-04
Loss = 6.5661e-03, PNorm = 164.3138, GNorm = 0.0799, lr_0 = 3.4050e-04
Loss = 7.6584e-03, PNorm = 164.3238, GNorm = 0.1984, lr_0 = 3.4026e-04
Loss = 5.6541e-03, PNorm = 164.3351, GNorm = 0.1806, lr_0 = 3.4003e-04
Loss = 6.1387e-03, PNorm = 164.3457, GNorm = 0.4020, lr_0 = 3.3980e-04
Loss = 6.2057e-03, PNorm = 164.3541, GNorm = 0.1134, lr_0 = 3.3956e-04
Loss = 7.4070e-03, PNorm = 164.3638, GNorm = 0.1962, lr_0 = 3.3933e-04
Loss = 6.3793e-03, PNorm = 164.3760, GNorm = 0.6452, lr_0 = 3.3910e-04
Loss = 6.6831e-03, PNorm = 164.3876, GNorm = 0.2134, lr_0 = 3.3887e-04
Loss = 7.8182e-03, PNorm = 164.3973, GNorm = 0.1799, lr_0 = 3.3864e-04
Loss = 5.7367e-03, PNorm = 164.4123, GNorm = 0.2664, lr_0 = 3.3840e-04
Loss = 5.8540e-03, PNorm = 164.4277, GNorm = 0.1281, lr_0 = 3.3817e-04
Loss = 7.0432e-03, PNorm = 164.4446, GNorm = 0.4381, lr_0 = 3.3794e-04
Loss = 5.2769e-03, PNorm = 164.4576, GNorm = 0.3184, lr_0 = 3.3771e-04
Loss = 7.0921e-03, PNorm = 164.4711, GNorm = 0.1534, lr_0 = 3.3748e-04
Loss = 6.1727e-03, PNorm = 164.4840, GNorm = 0.0744, lr_0 = 3.3725e-04
Loss = 5.6142e-03, PNorm = 164.4948, GNorm = 0.2482, lr_0 = 3.3701e-04
Loss = 5.8819e-03, PNorm = 164.5057, GNorm = 0.1602, lr_0 = 3.3678e-04
Loss = 9.6668e-03, PNorm = 164.5135, GNorm = 0.2547, lr_0 = 3.3655e-04
Loss = 6.1251e-03, PNorm = 164.5240, GNorm = 0.4194, lr_0 = 3.3632e-04
Loss = 5.9499e-03, PNorm = 164.5345, GNorm = 0.2327, lr_0 = 3.3609e-04
Loss = 5.3536e-03, PNorm = 164.5466, GNorm = 0.1708, lr_0 = 3.3586e-04
Loss = 5.8573e-03, PNorm = 164.5538, GNorm = 0.3572, lr_0 = 3.3563e-04
Loss = 5.6879e-03, PNorm = 164.5636, GNorm = 0.3452, lr_0 = 3.3540e-04
Loss = 6.2652e-03, PNorm = 164.5746, GNorm = 0.1622, lr_0 = 3.3517e-04
Loss = 7.3142e-03, PNorm = 164.5887, GNorm = 0.4451, lr_0 = 3.3494e-04
Loss = 6.5737e-03, PNorm = 164.6063, GNorm = 0.2664, lr_0 = 3.3471e-04
Loss = 6.5909e-03, PNorm = 164.6179, GNorm = 0.1709, lr_0 = 3.3448e-04
Loss = 5.9246e-03, PNorm = 164.6311, GNorm = 0.2979, lr_0 = 3.3425e-04
Loss = 5.4289e-03, PNorm = 164.6413, GNorm = 0.1395, lr_0 = 3.3403e-04
Loss = 7.1893e-03, PNorm = 164.6542, GNorm = 0.3707, lr_0 = 3.3380e-04
Loss = 8.2464e-03, PNorm = 164.6666, GNorm = 0.1738, lr_0 = 3.3357e-04
Loss = 7.2553e-03, PNorm = 164.6794, GNorm = 0.3156, lr_0 = 3.3334e-04
Loss = 6.8097e-03, PNorm = 164.6950, GNorm = 0.1187, lr_0 = 3.3311e-04
Loss = 6.2784e-03, PNorm = 164.7094, GNorm = 0.0804, lr_0 = 3.3288e-04
Loss = 5.2263e-03, PNorm = 164.7236, GNorm = 0.2767, lr_0 = 3.3265e-04
Loss = 6.0586e-03, PNorm = 164.7356, GNorm = 0.3635, lr_0 = 3.3243e-04
Loss = 8.5209e-03, PNorm = 164.7508, GNorm = 0.1533, lr_0 = 3.3220e-04
Loss = 6.1361e-03, PNorm = 164.7626, GNorm = 0.2514, lr_0 = 3.3197e-04
Loss = 6.2724e-03, PNorm = 164.7754, GNorm = 0.1849, lr_0 = 3.3174e-04
Loss = 5.7908e-03, PNorm = 164.7893, GNorm = 0.1767, lr_0 = 3.3152e-04
Loss = 8.2077e-03, PNorm = 164.8046, GNorm = 0.3496, lr_0 = 3.3129e-04
Loss = 5.0130e-03, PNorm = 164.8190, GNorm = 0.1164, lr_0 = 3.3106e-04
Loss = 6.7501e-03, PNorm = 164.8312, GNorm = 0.0998, lr_0 = 3.3084e-04
Loss = 6.3787e-03, PNorm = 164.8465, GNorm = 0.2468, lr_0 = 3.3061e-04
Loss = 5.5353e-03, PNorm = 164.8556, GNorm = 0.3148, lr_0 = 3.3038e-04
Loss = 5.2593e-03, PNorm = 164.8691, GNorm = 0.1466, lr_0 = 3.3016e-04
Loss = 8.0370e-03, PNorm = 164.8799, GNorm = 0.3124, lr_0 = 3.2993e-04
Loss = 5.7578e-03, PNorm = 164.8962, GNorm = 0.1553, lr_0 = 3.2970e-04
Loss = 4.8929e-03, PNorm = 164.9108, GNorm = 0.2067, lr_0 = 3.2948e-04
Loss = 7.1628e-03, PNorm = 164.9265, GNorm = 0.2616, lr_0 = 3.2925e-04
Loss = 5.8186e-03, PNorm = 164.9393, GNorm = 0.3216, lr_0 = 3.2903e-04
Loss = 4.7403e-03, PNorm = 164.9537, GNorm = 0.0851, lr_0 = 3.2880e-04
Loss = 5.7568e-03, PNorm = 164.9630, GNorm = 0.3739, lr_0 = 3.2858e-04
Loss = 6.2317e-03, PNorm = 164.9737, GNorm = 0.2997, lr_0 = 3.2835e-04
Loss = 6.9515e-03, PNorm = 164.9885, GNorm = 0.1524, lr_0 = 3.2813e-04
Loss = 4.8453e-03, PNorm = 165.0019, GNorm = 0.1111, lr_0 = 3.2790e-04
Loss = 5.7690e-03, PNorm = 165.0133, GNorm = 0.2096, lr_0 = 3.2768e-04
Loss = 7.3203e-03, PNorm = 165.0241, GNorm = 0.5155, lr_0 = 3.2745e-04
Loss = 5.3741e-03, PNorm = 165.0327, GNorm = 0.2390, lr_0 = 3.2723e-04
Loss = 5.2426e-03, PNorm = 165.0456, GNorm = 0.3470, lr_0 = 3.2700e-04
Loss = 6.8906e-03, PNorm = 165.0604, GNorm = 0.1788, lr_0 = 3.2678e-04
Loss = 7.2388e-03, PNorm = 165.0747, GNorm = 0.3826, lr_0 = 3.2656e-04
Loss = 6.0287e-03, PNorm = 165.0879, GNorm = 0.1871, lr_0 = 3.2633e-04
Loss = 5.9792e-03, PNorm = 165.1020, GNorm = 0.3094, lr_0 = 3.2611e-04
Loss = 5.9114e-03, PNorm = 165.1161, GNorm = 0.2078, lr_0 = 3.2589e-04
Loss = 6.2301e-03, PNorm = 165.1289, GNorm = 0.3167, lr_0 = 3.2566e-04
Loss = 6.3559e-03, PNorm = 165.1397, GNorm = 0.1879, lr_0 = 3.2544e-04
Loss = 7.1511e-03, PNorm = 165.1526, GNorm = 0.1352, lr_0 = 3.2522e-04
Loss = 6.6461e-03, PNorm = 165.1668, GNorm = 0.2253, lr_0 = 3.2499e-04
Loss = 5.7686e-03, PNorm = 165.1804, GNorm = 0.6624, lr_0 = 3.2477e-04
Loss = 4.7342e-03, PNorm = 165.1950, GNorm = 0.1102, lr_0 = 3.2455e-04
Loss = 4.7358e-03, PNorm = 165.2082, GNorm = 0.1462, lr_0 = 3.2433e-04
Loss = 5.7305e-03, PNorm = 165.2201, GNorm = 0.1637, lr_0 = 3.2410e-04
Loss = 5.4718e-03, PNorm = 165.2360, GNorm = 0.1142, lr_0 = 3.2388e-04
Loss = 5.2806e-03, PNorm = 165.2485, GNorm = 0.1622, lr_0 = 3.2366e-04
Loss = 5.6778e-03, PNorm = 165.2664, GNorm = 0.1454, lr_0 = 3.2344e-04
Loss = 5.9645e-03, PNorm = 165.2786, GNorm = 0.1588, lr_0 = 3.2322e-04
Loss = 7.2087e-03, PNorm = 165.2895, GNorm = 0.3114, lr_0 = 3.2300e-04
Loss = 9.7203e-03, PNorm = 165.2954, GNorm = 0.1098, lr_0 = 3.2277e-04
Loss = 6.3174e-03, PNorm = 165.3079, GNorm = 0.2027, lr_0 = 3.2255e-04
Loss = 5.8492e-03, PNorm = 165.3194, GNorm = 0.1010, lr_0 = 3.2233e-04
Loss = 6.4710e-03, PNorm = 165.3306, GNorm = 0.3633, lr_0 = 3.2211e-04
Loss = 6.0384e-03, PNorm = 165.3450, GNorm = 0.2003, lr_0 = 3.2189e-04
Loss = 6.2982e-03, PNorm = 165.3597, GNorm = 0.1883, lr_0 = 3.2167e-04
Loss = 8.5641e-03, PNorm = 165.3736, GNorm = 0.2653, lr_0 = 3.2145e-04
Loss = 6.3395e-03, PNorm = 165.3849, GNorm = 0.3735, lr_0 = 3.2123e-04
Loss = 6.0877e-03, PNorm = 165.4006, GNorm = 0.3250, lr_0 = 3.2101e-04
Loss = 6.7595e-03, PNorm = 165.4118, GNorm = 0.2810, lr_0 = 3.2079e-04
Loss = 5.4960e-03, PNorm = 165.4244, GNorm = 0.2132, lr_0 = 3.2057e-04
Loss = 5.3944e-03, PNorm = 165.4397, GNorm = 0.3054, lr_0 = 3.2035e-04
Loss = 7.0186e-03, PNorm = 165.4553, GNorm = 0.4348, lr_0 = 3.2013e-04
Loss = 6.3423e-03, PNorm = 165.4695, GNorm = 0.1801, lr_0 = 3.1991e-04
Loss = 5.7517e-03, PNorm = 165.4838, GNorm = 0.2402, lr_0 = 3.1969e-04
Loss = 4.4352e-03, PNorm = 165.4963, GNorm = 0.4306, lr_0 = 3.1947e-04
Loss = 4.9835e-03, PNorm = 165.5083, GNorm = 0.1545, lr_0 = 3.1925e-04
Loss = 5.3514e-03, PNorm = 165.5207, GNorm = 0.3914, lr_0 = 3.1904e-04
Loss = 5.2710e-03, PNorm = 165.5359, GNorm = 0.5137, lr_0 = 3.1882e-04
Loss = 7.4422e-03, PNorm = 165.5503, GNorm = 0.1810, lr_0 = 3.1860e-04
Loss = 6.3143e-03, PNorm = 165.5677, GNorm = 0.1573, lr_0 = 3.1838e-04
Loss = 5.5998e-03, PNorm = 165.5838, GNorm = 0.2336, lr_0 = 3.1816e-04
Loss = 7.5396e-03, PNorm = 165.5983, GNorm = 0.1306, lr_0 = 3.1794e-04
Loss = 5.6698e-03, PNorm = 165.6114, GNorm = 0.0855, lr_0 = 3.1773e-04
Loss = 8.1051e-03, PNorm = 165.6245, GNorm = 0.1120, lr_0 = 3.1751e-04
Loss = 8.0474e-03, PNorm = 165.6370, GNorm = 0.2280, lr_0 = 3.1729e-04
Loss = 7.6880e-03, PNorm = 165.6503, GNorm = 0.1053, lr_0 = 3.1707e-04
Loss = 5.5067e-03, PNorm = 165.6662, GNorm = 0.1223, lr_0 = 3.1686e-04
Loss = 7.2914e-03, PNorm = 165.6807, GNorm = 0.2368, lr_0 = 3.1664e-04
Loss = 8.7060e-03, PNorm = 165.6989, GNorm = 0.2453, lr_0 = 3.1642e-04
Loss = 8.0051e-03, PNorm = 165.7106, GNorm = 0.3622, lr_0 = 3.1621e-04
Validation mae = 0.279407
Epoch 16
Loss = 5.2076e-03, PNorm = 165.7204, GNorm = 0.1895, lr_0 = 3.1599e-04
Loss = 5.1577e-03, PNorm = 165.7259, GNorm = 0.2033, lr_0 = 3.1577e-04
Loss = 5.7323e-03, PNorm = 165.7360, GNorm = 0.3213, lr_0 = 3.1556e-04
Loss = 4.8308e-03, PNorm = 165.7442, GNorm = 0.2050, lr_0 = 3.1534e-04
Loss = 5.8051e-03, PNorm = 165.7534, GNorm = 0.3655, lr_0 = 3.1512e-04
Loss = 5.1614e-03, PNorm = 165.7603, GNorm = 0.2185, lr_0 = 3.1491e-04
Loss = 4.7844e-03, PNorm = 165.7705, GNorm = 0.3344, lr_0 = 3.1469e-04
Loss = 5.4033e-03, PNorm = 165.7791, GNorm = 0.1780, lr_0 = 3.1448e-04
Loss = 5.5899e-03, PNorm = 165.7877, GNorm = 0.2879, lr_0 = 3.1426e-04
Loss = 6.7599e-03, PNorm = 165.7983, GNorm = 0.1968, lr_0 = 3.1405e-04
Loss = 5.0084e-03, PNorm = 165.8081, GNorm = 0.3010, lr_0 = 3.1383e-04
Loss = 6.1262e-03, PNorm = 165.8180, GNorm = 0.2546, lr_0 = 3.1362e-04
Loss = 7.0257e-03, PNorm = 165.8277, GNorm = 0.1771, lr_0 = 3.1340e-04
Loss = 5.0416e-03, PNorm = 165.8378, GNorm = 0.2744, lr_0 = 3.1319e-04
Loss = 5.4492e-03, PNorm = 165.8469, GNorm = 0.1991, lr_0 = 3.1297e-04
Loss = 5.2590e-03, PNorm = 165.8600, GNorm = 0.3435, lr_0 = 3.1276e-04
Loss = 4.2986e-03, PNorm = 165.8720, GNorm = 0.3647, lr_0 = 3.1254e-04
Loss = 5.4359e-03, PNorm = 165.8817, GNorm = 0.1375, lr_0 = 3.1233e-04
Loss = 3.9212e-03, PNorm = 165.8906, GNorm = 0.2631, lr_0 = 3.1212e-04
Loss = 6.6001e-03, PNorm = 165.9006, GNorm = 0.2476, lr_0 = 3.1190e-04
Loss = 7.7846e-03, PNorm = 165.9113, GNorm = 0.1974, lr_0 = 3.1169e-04
Loss = 5.5742e-03, PNorm = 165.9205, GNorm = 0.2342, lr_0 = 3.1147e-04
Loss = 4.9090e-03, PNorm = 165.9317, GNorm = 0.1898, lr_0 = 3.1126e-04
Loss = 5.4164e-03, PNorm = 165.9418, GNorm = 0.2555, lr_0 = 3.1105e-04
Loss = 4.5254e-03, PNorm = 165.9511, GNorm = 0.3129, lr_0 = 3.1083e-04
Loss = 5.0988e-03, PNorm = 165.9603, GNorm = 0.1519, lr_0 = 3.1062e-04
Loss = 4.3762e-03, PNorm = 165.9661, GNorm = 0.4339, lr_0 = 3.1041e-04
Loss = 4.6566e-03, PNorm = 165.9755, GNorm = 0.1633, lr_0 = 3.1020e-04
Loss = 5.8018e-03, PNorm = 165.9874, GNorm = 0.0712, lr_0 = 3.0998e-04
Loss = 4.4863e-03, PNorm = 165.9981, GNorm = 0.2021, lr_0 = 3.0977e-04
Loss = 5.6154e-03, PNorm = 166.0059, GNorm = 0.3890, lr_0 = 3.0956e-04
Loss = 5.2701e-03, PNorm = 166.0142, GNorm = 0.3664, lr_0 = 3.0935e-04
Loss = 6.8923e-03, PNorm = 166.0241, GNorm = 0.0761, lr_0 = 3.0914e-04
Loss = 4.1812e-03, PNorm = 166.0350, GNorm = 0.1483, lr_0 = 3.0892e-04
Loss = 5.6450e-03, PNorm = 166.0456, GNorm = 0.2323, lr_0 = 3.0871e-04
Loss = 5.2924e-03, PNorm = 166.0573, GNorm = 0.1276, lr_0 = 3.0850e-04
Loss = 5.5455e-03, PNorm = 166.0706, GNorm = 0.1678, lr_0 = 3.0829e-04
Loss = 6.2667e-03, PNorm = 166.0778, GNorm = 0.1968, lr_0 = 3.0808e-04
Loss = 4.7947e-03, PNorm = 166.0868, GNorm = 0.1805, lr_0 = 3.0787e-04
Loss = 5.2863e-03, PNorm = 166.0926, GNorm = 0.3218, lr_0 = 3.0766e-04
Loss = 4.9101e-03, PNorm = 166.1008, GNorm = 0.2592, lr_0 = 3.0745e-04
Loss = 5.4124e-03, PNorm = 166.1104, GNorm = 0.1735, lr_0 = 3.0723e-04
Loss = 5.3178e-03, PNorm = 166.1223, GNorm = 0.0819, lr_0 = 3.0702e-04
Loss = 5.2223e-03, PNorm = 166.1329, GNorm = 0.4094, lr_0 = 3.0681e-04
Loss = 4.1849e-03, PNorm = 166.1445, GNorm = 0.3287, lr_0 = 3.0660e-04
Loss = 5.4439e-03, PNorm = 166.1568, GNorm = 0.0820, lr_0 = 3.0639e-04
Loss = 6.8839e-03, PNorm = 166.1664, GNorm = 0.1921, lr_0 = 3.0618e-04
Loss = 5.1641e-03, PNorm = 166.1770, GNorm = 0.2494, lr_0 = 3.0597e-04
Loss = 5.9331e-03, PNorm = 166.1877, GNorm = 0.4113, lr_0 = 3.0576e-04
Loss = 5.1176e-03, PNorm = 166.2000, GNorm = 0.0929, lr_0 = 3.0555e-04
Loss = 8.5950e-03, PNorm = 166.2127, GNorm = 0.0708, lr_0 = 3.0535e-04
Loss = 7.0174e-03, PNorm = 166.2257, GNorm = 0.1604, lr_0 = 3.0514e-04
Loss = 4.7039e-03, PNorm = 166.2378, GNorm = 0.2265, lr_0 = 3.0493e-04
Loss = 4.8658e-03, PNorm = 166.2474, GNorm = 0.2693, lr_0 = 3.0472e-04
Loss = 4.4009e-03, PNorm = 166.2577, GNorm = 0.2693, lr_0 = 3.0451e-04
Loss = 6.6250e-03, PNorm = 166.2677, GNorm = 0.1799, lr_0 = 3.0430e-04
Loss = 4.6301e-03, PNorm = 166.2775, GNorm = 0.3371, lr_0 = 3.0409e-04
Loss = 5.0489e-03, PNorm = 166.2845, GNorm = 0.2951, lr_0 = 3.0388e-04
Loss = 6.4799e-03, PNorm = 166.2954, GNorm = 0.1287, lr_0 = 3.0368e-04
Loss = 5.7872e-03, PNorm = 166.3092, GNorm = 0.2623, lr_0 = 3.0347e-04
Loss = 4.0254e-03, PNorm = 166.3223, GNorm = 0.0953, lr_0 = 3.0326e-04
Loss = 5.0638e-03, PNorm = 166.3340, GNorm = 0.1286, lr_0 = 3.0305e-04
Loss = 4.5288e-03, PNorm = 166.3441, GNorm = 0.0910, lr_0 = 3.0284e-04
Loss = 4.4498e-03, PNorm = 166.3521, GNorm = 0.2447, lr_0 = 3.0264e-04
Loss = 4.7724e-03, PNorm = 166.3622, GNorm = 0.1742, lr_0 = 3.0243e-04
Loss = 4.5062e-03, PNorm = 166.3748, GNorm = 0.2358, lr_0 = 3.0222e-04
Loss = 4.0464e-03, PNorm = 166.3846, GNorm = 0.2246, lr_0 = 3.0202e-04
Loss = 4.2837e-03, PNorm = 166.3936, GNorm = 0.1935, lr_0 = 3.0181e-04
Loss = 6.2227e-03, PNorm = 166.4041, GNorm = 0.1997, lr_0 = 3.0160e-04
Loss = 4.7396e-03, PNorm = 166.4143, GNorm = 0.1033, lr_0 = 3.0140e-04
Loss = 7.2293e-03, PNorm = 166.4202, GNorm = 0.3294, lr_0 = 3.0119e-04
Loss = 4.3749e-03, PNorm = 166.4299, GNorm = 0.2791, lr_0 = 3.0098e-04
Loss = 5.7417e-03, PNorm = 166.4431, GNorm = 0.1580, lr_0 = 3.0078e-04
Loss = 4.1766e-03, PNorm = 166.4573, GNorm = 0.2415, lr_0 = 3.0057e-04
Loss = 4.6675e-03, PNorm = 166.4660, GNorm = 0.1299, lr_0 = 3.0036e-04
Loss = 6.0890e-03, PNorm = 166.4727, GNorm = 0.2629, lr_0 = 3.0016e-04
Loss = 5.4558e-03, PNorm = 166.4830, GNorm = 0.3318, lr_0 = 2.9995e-04
Loss = 4.8932e-03, PNorm = 166.4939, GNorm = 0.1914, lr_0 = 2.9975e-04
Loss = 4.3008e-03, PNorm = 166.5046, GNorm = 0.0845, lr_0 = 2.9954e-04
Loss = 6.9585e-03, PNorm = 166.5135, GNorm = 0.2234, lr_0 = 2.9934e-04
Loss = 4.9653e-03, PNorm = 166.5267, GNorm = 0.2233, lr_0 = 2.9913e-04
Loss = 4.6155e-03, PNorm = 166.5400, GNorm = 0.1805, lr_0 = 2.9893e-04
Loss = 4.1561e-03, PNorm = 166.5534, GNorm = 0.5276, lr_0 = 2.9872e-04
Loss = 3.9551e-03, PNorm = 166.5636, GNorm = 0.1445, lr_0 = 2.9852e-04
Loss = 4.7446e-03, PNorm = 166.5735, GNorm = 0.2216, lr_0 = 2.9831e-04
Loss = 4.9192e-03, PNorm = 166.5830, GNorm = 0.1995, lr_0 = 2.9811e-04
Loss = 5.2142e-03, PNorm = 166.5949, GNorm = 0.3829, lr_0 = 2.9790e-04
Loss = 3.8882e-03, PNorm = 166.6080, GNorm = 0.3482, lr_0 = 2.9770e-04
Loss = 6.0796e-03, PNorm = 166.6178, GNorm = 0.1180, lr_0 = 2.9750e-04
Loss = 4.6097e-03, PNorm = 166.6258, GNorm = 0.2145, lr_0 = 2.9729e-04
Loss = 4.3280e-03, PNorm = 166.6309, GNorm = 0.1997, lr_0 = 2.9709e-04
Loss = 5.1243e-03, PNorm = 166.6444, GNorm = 0.1509, lr_0 = 2.9689e-04
Loss = 4.6339e-03, PNorm = 166.6585, GNorm = 0.0891, lr_0 = 2.9668e-04
Loss = 3.8442e-03, PNorm = 166.6694, GNorm = 0.1186, lr_0 = 2.9648e-04
Loss = 8.3925e-03, PNorm = 166.6813, GNorm = 0.2304, lr_0 = 2.9628e-04
Loss = 4.2949e-03, PNorm = 166.6945, GNorm = 0.3038, lr_0 = 2.9607e-04
Loss = 1.0028e-02, PNorm = 166.7037, GNorm = 0.8086, lr_0 = 2.9587e-04
Loss = 4.8317e-03, PNorm = 166.7172, GNorm = 0.1487, lr_0 = 2.9567e-04
Loss = 4.3784e-03, PNorm = 166.7307, GNorm = 0.2476, lr_0 = 2.9546e-04
Loss = 5.1979e-03, PNorm = 166.7441, GNorm = 0.1966, lr_0 = 2.9526e-04
Loss = 5.2959e-03, PNorm = 166.7554, GNorm = 0.1881, lr_0 = 2.9506e-04
Loss = 5.2377e-03, PNorm = 166.7640, GNorm = 0.2280, lr_0 = 2.9486e-04
Loss = 5.2872e-03, PNorm = 166.7727, GNorm = 0.5614, lr_0 = 2.9466e-04
Loss = 5.0809e-03, PNorm = 166.7842, GNorm = 0.4564, lr_0 = 2.9445e-04
Loss = 5.5211e-03, PNorm = 166.7989, GNorm = 0.1915, lr_0 = 2.9425e-04
Loss = 5.5576e-03, PNorm = 166.8111, GNorm = 0.1473, lr_0 = 2.9405e-04
Loss = 4.3488e-03, PNorm = 166.8240, GNorm = 0.2337, lr_0 = 2.9385e-04
Loss = 5.2838e-03, PNorm = 166.8363, GNorm = 0.2184, lr_0 = 2.9365e-04
Loss = 5.4613e-03, PNorm = 166.8484, GNorm = 0.3311, lr_0 = 2.9345e-04
Loss = 4.5202e-03, PNorm = 166.8588, GNorm = 0.2096, lr_0 = 2.9325e-04
Loss = 5.3259e-03, PNorm = 166.8689, GNorm = 0.2888, lr_0 = 2.9305e-04
Loss = 5.4859e-03, PNorm = 166.8814, GNorm = 0.1969, lr_0 = 2.9284e-04
Loss = 7.0121e-03, PNorm = 166.8911, GNorm = 0.2182, lr_0 = 2.9264e-04
Loss = 5.4324e-03, PNorm = 166.9013, GNorm = 0.5481, lr_0 = 2.9244e-04
Loss = 7.0655e-03, PNorm = 166.9115, GNorm = 0.2367, lr_0 = 2.9224e-04
Loss = 9.3341e-03, PNorm = 166.9249, GNorm = 0.5675, lr_0 = 2.9204e-04
Loss = 5.5528e-03, PNorm = 166.9364, GNorm = 0.3427, lr_0 = 2.9184e-04
Loss = 4.7204e-03, PNorm = 166.9470, GNorm = 0.1895, lr_0 = 2.9164e-04
Loss = 4.5525e-03, PNorm = 166.9553, GNorm = 0.2223, lr_0 = 2.9144e-04
Loss = 5.6687e-03, PNorm = 166.9646, GNorm = 0.2895, lr_0 = 2.9124e-04
Validation mae = 0.278666
Epoch 17
Loss = 6.1026e-03, PNorm = 166.9709, GNorm = 0.5487, lr_0 = 2.9104e-04
Loss = 4.5142e-03, PNorm = 166.9793, GNorm = 0.0742, lr_0 = 2.9084e-04
Loss = 6.4335e-03, PNorm = 166.9906, GNorm = 0.1702, lr_0 = 2.9065e-04
Loss = 3.9334e-03, PNorm = 167.0002, GNorm = 0.3392, lr_0 = 2.9045e-04
Loss = 5.0952e-03, PNorm = 167.0083, GNorm = 0.1384, lr_0 = 2.9025e-04
Loss = 5.5872e-03, PNorm = 167.0155, GNorm = 0.2748, lr_0 = 2.9005e-04
Loss = 3.4732e-03, PNorm = 167.0214, GNorm = 0.1255, lr_0 = 2.8985e-04
Loss = 3.8351e-03, PNorm = 167.0275, GNorm = 0.1782, lr_0 = 2.8965e-04
Loss = 4.7906e-03, PNorm = 167.0370, GNorm = 0.2013, lr_0 = 2.8945e-04
Loss = 3.8321e-03, PNorm = 167.0443, GNorm = 0.1232, lr_0 = 2.8925e-04
Loss = 4.7756e-03, PNorm = 167.0520, GNorm = 0.4499, lr_0 = 2.8906e-04
Loss = 4.9634e-03, PNorm = 167.0597, GNorm = 0.1775, lr_0 = 2.8886e-04
Loss = 3.8232e-03, PNorm = 167.0648, GNorm = 0.0643, lr_0 = 2.8866e-04
Loss = 4.4798e-03, PNorm = 167.0730, GNorm = 0.0820, lr_0 = 2.8846e-04
Loss = 3.6105e-03, PNorm = 167.0824, GNorm = 0.2296, lr_0 = 2.8826e-04
Loss = 3.8381e-03, PNorm = 167.0908, GNorm = 0.2532, lr_0 = 2.8807e-04
Loss = 4.0284e-03, PNorm = 167.1001, GNorm = 0.1302, lr_0 = 2.8787e-04
Loss = 3.9848e-03, PNorm = 167.1064, GNorm = 0.2760, lr_0 = 2.8767e-04
Loss = 3.3934e-03, PNorm = 167.1138, GNorm = 0.1534, lr_0 = 2.8748e-04
Loss = 4.1883e-03, PNorm = 167.1218, GNorm = 0.0733, lr_0 = 2.8728e-04
Loss = 4.4875e-03, PNorm = 167.1289, GNorm = 0.2627, lr_0 = 2.8708e-04
Loss = 3.6032e-03, PNorm = 167.1384, GNorm = 0.1137, lr_0 = 2.8689e-04
Loss = 3.9406e-03, PNorm = 167.1465, GNorm = 0.2994, lr_0 = 2.8669e-04
Loss = 3.1763e-03, PNorm = 167.1546, GNorm = 0.2466, lr_0 = 2.8649e-04
Loss = 4.2381e-03, PNorm = 167.1596, GNorm = 0.2011, lr_0 = 2.8630e-04
Loss = 3.9381e-03, PNorm = 167.1665, GNorm = 0.1751, lr_0 = 2.8610e-04
Loss = 5.4315e-03, PNorm = 167.1751, GNorm = 0.4135, lr_0 = 2.8590e-04
Loss = 5.8823e-03, PNorm = 167.1857, GNorm = 0.0732, lr_0 = 2.8571e-04
Loss = 4.4082e-03, PNorm = 167.1947, GNorm = 0.3390, lr_0 = 2.8551e-04
Loss = 3.3753e-03, PNorm = 167.2029, GNorm = 0.2095, lr_0 = 2.8532e-04
Loss = 7.1233e-03, PNorm = 167.2120, GNorm = 0.8920, lr_0 = 2.8512e-04
Loss = 3.9294e-03, PNorm = 167.2194, GNorm = 0.3576, lr_0 = 2.8493e-04
Loss = 5.0773e-03, PNorm = 167.2276, GNorm = 0.2079, lr_0 = 2.8473e-04
Loss = 4.4949e-03, PNorm = 167.2349, GNorm = 0.1244, lr_0 = 2.8454e-04
Loss = 3.6418e-03, PNorm = 167.2434, GNorm = 0.0921, lr_0 = 2.8434e-04
Loss = 3.5368e-03, PNorm = 167.2518, GNorm = 0.1806, lr_0 = 2.8415e-04
Loss = 3.8924e-03, PNorm = 167.2606, GNorm = 0.1853, lr_0 = 2.8395e-04
Loss = 3.8474e-03, PNorm = 167.2681, GNorm = 0.2165, lr_0 = 2.8376e-04
Loss = 3.9501e-03, PNorm = 167.2759, GNorm = 0.1984, lr_0 = 2.8356e-04
Loss = 3.5732e-03, PNorm = 167.2828, GNorm = 0.1083, lr_0 = 2.8337e-04
Loss = 5.5735e-03, PNorm = 167.2906, GNorm = 0.3423, lr_0 = 2.8317e-04
Loss = 5.0760e-03, PNorm = 167.2980, GNorm = 0.1872, lr_0 = 2.8298e-04
Loss = 4.7229e-03, PNorm = 167.3054, GNorm = 0.2138, lr_0 = 2.8279e-04
Loss = 6.3930e-03, PNorm = 167.3145, GNorm = 0.1721, lr_0 = 2.8259e-04
Loss = 4.4815e-03, PNorm = 167.3245, GNorm = 0.1414, lr_0 = 2.8240e-04
Loss = 4.7178e-03, PNorm = 167.3353, GNorm = 0.1904, lr_0 = 2.8221e-04
Loss = 3.5638e-03, PNorm = 167.3460, GNorm = 0.1710, lr_0 = 2.8201e-04
Loss = 4.3547e-03, PNorm = 167.3554, GNorm = 0.1129, lr_0 = 2.8182e-04
Loss = 4.7524e-03, PNorm = 167.3625, GNorm = 0.1772, lr_0 = 2.8163e-04
Loss = 4.1114e-03, PNorm = 167.3708, GNorm = 0.1428, lr_0 = 2.8143e-04
Loss = 4.8275e-03, PNorm = 167.3752, GNorm = 0.2855, lr_0 = 2.8124e-04
Loss = 4.2421e-03, PNorm = 167.3833, GNorm = 0.2559, lr_0 = 2.8105e-04
Loss = 5.8675e-03, PNorm = 167.3917, GNorm = 0.5386, lr_0 = 2.8085e-04
Loss = 7.2038e-03, PNorm = 167.4015, GNorm = 0.2026, lr_0 = 2.8066e-04
Loss = 5.1606e-03, PNorm = 167.4099, GNorm = 0.2764, lr_0 = 2.8047e-04
Loss = 4.9709e-03, PNorm = 167.4170, GNorm = 0.4017, lr_0 = 2.8028e-04
Loss = 5.0629e-03, PNorm = 167.4274, GNorm = 0.2107, lr_0 = 2.8009e-04
Loss = 4.2979e-03, PNorm = 167.4365, GNorm = 0.2132, lr_0 = 2.7989e-04
Loss = 4.1481e-03, PNorm = 167.4481, GNorm = 0.2659, lr_0 = 2.7970e-04
Loss = 3.6568e-03, PNorm = 167.4577, GNorm = 0.1362, lr_0 = 2.7951e-04
Loss = 3.9344e-03, PNorm = 167.4645, GNorm = 0.2535, lr_0 = 2.7932e-04
Loss = 4.7763e-03, PNorm = 167.4716, GNorm = 0.1863, lr_0 = 2.7913e-04
Loss = 4.8130e-03, PNorm = 167.4770, GNorm = 0.3154, lr_0 = 2.7894e-04
Loss = 4.8943e-03, PNorm = 167.4876, GNorm = 0.1808, lr_0 = 2.7875e-04
Loss = 5.8220e-03, PNorm = 167.4923, GNorm = 0.1327, lr_0 = 2.7855e-04
Loss = 4.5605e-03, PNorm = 167.5013, GNorm = 0.1894, lr_0 = 2.7836e-04
Loss = 4.6646e-03, PNorm = 167.5129, GNorm = 0.0863, lr_0 = 2.7817e-04
Loss = 3.7319e-03, PNorm = 167.5249, GNorm = 0.0540, lr_0 = 2.7798e-04
Loss = 3.9252e-03, PNorm = 167.5348, GNorm = 0.1903, lr_0 = 2.7779e-04
Loss = 6.2237e-03, PNorm = 167.5448, GNorm = 0.0758, lr_0 = 2.7760e-04
Loss = 3.4940e-03, PNorm = 167.5495, GNorm = 0.1500, lr_0 = 2.7741e-04
Loss = 4.8946e-03, PNorm = 167.5560, GNorm = 0.1443, lr_0 = 2.7722e-04
Loss = 3.9032e-03, PNorm = 167.5662, GNorm = 0.2746, lr_0 = 2.7703e-04
Loss = 5.9723e-03, PNorm = 167.5774, GNorm = 0.3960, lr_0 = 2.7684e-04
Loss = 3.2585e-03, PNorm = 167.5878, GNorm = 0.1349, lr_0 = 2.7665e-04
Loss = 6.1605e-03, PNorm = 167.5977, GNorm = 0.2237, lr_0 = 2.7646e-04
Loss = 4.1713e-03, PNorm = 167.6051, GNorm = 0.2120, lr_0 = 2.7627e-04
Loss = 4.3115e-03, PNorm = 167.6130, GNorm = 0.3085, lr_0 = 2.7608e-04
Loss = 4.1734e-03, PNorm = 167.6199, GNorm = 0.1842, lr_0 = 2.7590e-04
Loss = 5.6534e-03, PNorm = 167.6298, GNorm = 0.1528, lr_0 = 2.7571e-04
Loss = 3.9986e-03, PNorm = 167.6400, GNorm = 0.1825, lr_0 = 2.7552e-04
Loss = 6.1221e-03, PNorm = 167.6524, GNorm = 0.3437, lr_0 = 2.7533e-04
Loss = 4.0713e-03, PNorm = 167.6618, GNorm = 0.1718, lr_0 = 2.7514e-04
Loss = 4.9334e-03, PNorm = 167.6702, GNorm = 0.0911, lr_0 = 2.7495e-04
Loss = 5.0926e-03, PNorm = 167.6781, GNorm = 0.1253, lr_0 = 2.7476e-04
Loss = 3.9493e-03, PNorm = 167.6878, GNorm = 0.1477, lr_0 = 2.7457e-04
Loss = 4.1806e-03, PNorm = 167.6967, GNorm = 0.0711, lr_0 = 2.7439e-04
Loss = 3.8074e-03, PNorm = 167.7069, GNorm = 0.2225, lr_0 = 2.7420e-04
Loss = 5.7382e-03, PNorm = 167.7162, GNorm = 0.3310, lr_0 = 2.7401e-04
Loss = 5.3134e-03, PNorm = 167.7259, GNorm = 0.1768, lr_0 = 2.7382e-04
Loss = 5.1935e-03, PNorm = 167.7327, GNorm = 0.0895, lr_0 = 2.7364e-04
Loss = 4.7119e-03, PNorm = 167.7433, GNorm = 0.2180, lr_0 = 2.7345e-04
Loss = 3.8474e-03, PNorm = 167.7518, GNorm = 0.2907, lr_0 = 2.7326e-04
Loss = 4.3510e-03, PNorm = 167.7614, GNorm = 0.2579, lr_0 = 2.7307e-04
Loss = 3.8901e-03, PNorm = 167.7697, GNorm = 0.1884, lr_0 = 2.7289e-04
Loss = 6.5630e-03, PNorm = 167.7810, GNorm = 0.2002, lr_0 = 2.7270e-04
Loss = 4.4285e-03, PNorm = 167.7895, GNorm = 0.1401, lr_0 = 2.7251e-04
Loss = 3.9286e-03, PNorm = 167.7968, GNorm = 0.3150, lr_0 = 2.7233e-04
Loss = 5.0794e-03, PNorm = 167.8048, GNorm = 0.1117, lr_0 = 2.7214e-04
Loss = 3.9218e-03, PNorm = 167.8155, GNorm = 0.0877, lr_0 = 2.7195e-04
Loss = 3.9899e-03, PNorm = 167.8229, GNorm = 0.3979, lr_0 = 2.7177e-04
Loss = 5.1744e-03, PNorm = 167.8309, GNorm = 0.5143, lr_0 = 2.7158e-04
Loss = 4.2851e-03, PNorm = 167.8396, GNorm = 0.2894, lr_0 = 2.7139e-04
Loss = 4.5937e-03, PNorm = 167.8481, GNorm = 0.1073, lr_0 = 2.7121e-04
Loss = 3.7080e-03, PNorm = 167.8572, GNorm = 0.0964, lr_0 = 2.7102e-04
Loss = 5.2744e-03, PNorm = 167.8682, GNorm = 0.1086, lr_0 = 2.7084e-04
Loss = 4.5071e-03, PNorm = 167.8727, GNorm = 0.2130, lr_0 = 2.7065e-04
Loss = 8.2218e-03, PNorm = 167.8783, GNorm = 0.2107, lr_0 = 2.7047e-04
Loss = 4.6778e-03, PNorm = 167.8843, GNorm = 0.1974, lr_0 = 2.7028e-04
Loss = 6.4282e-03, PNorm = 167.8964, GNorm = 0.1555, lr_0 = 2.7010e-04
Loss = 3.8292e-03, PNorm = 167.9094, GNorm = 0.2746, lr_0 = 2.6991e-04
Loss = 4.7896e-03, PNorm = 167.9170, GNorm = 0.2884, lr_0 = 2.6973e-04
Loss = 4.7162e-03, PNorm = 167.9277, GNorm = 0.0874, lr_0 = 2.6954e-04
Loss = 4.0954e-03, PNorm = 167.9387, GNorm = 0.1041, lr_0 = 2.6936e-04
Loss = 3.6886e-03, PNorm = 167.9491, GNorm = 0.0864, lr_0 = 2.6917e-04
Loss = 5.0309e-03, PNorm = 167.9600, GNorm = 0.4127, lr_0 = 2.6899e-04
Loss = 4.5852e-03, PNorm = 167.9713, GNorm = 0.1507, lr_0 = 2.6880e-04
Loss = 3.4627e-03, PNorm = 167.9804, GNorm = 0.3173, lr_0 = 2.6862e-04
Loss = 3.6887e-03, PNorm = 167.9887, GNorm = 0.1341, lr_0 = 2.6844e-04
Loss = 5.7590e-03, PNorm = 167.9936, GNorm = 0.2646, lr_0 = 2.6825e-04
Validation mae = 0.278481
Epoch 18
Loss = 4.6490e-03, PNorm = 168.0015, GNorm = 0.1713, lr_0 = 2.6807e-04
Loss = 4.4611e-03, PNorm = 168.0095, GNorm = 0.1115, lr_0 = 2.6788e-04
Loss = 4.5749e-03, PNorm = 168.0148, GNorm = 0.1567, lr_0 = 2.6770e-04
Loss = 3.3811e-03, PNorm = 168.0231, GNorm = 0.1588, lr_0 = 2.6752e-04
Loss = 3.4494e-03, PNorm = 168.0313, GNorm = 0.1361, lr_0 = 2.6733e-04
Loss = 3.5426e-03, PNorm = 168.0396, GNorm = 0.1549, lr_0 = 2.6715e-04
Loss = 3.3694e-03, PNorm = 168.0479, GNorm = 0.1809, lr_0 = 2.6697e-04
Loss = 4.4556e-03, PNorm = 168.0547, GNorm = 0.2232, lr_0 = 2.6678e-04
Loss = 4.4309e-03, PNorm = 168.0619, GNorm = 0.1599, lr_0 = 2.6660e-04
Loss = 4.0208e-03, PNorm = 168.0684, GNorm = 0.0889, lr_0 = 2.6642e-04
Loss = 3.1845e-03, PNorm = 168.0738, GNorm = 0.1861, lr_0 = 2.6624e-04
Loss = 4.0413e-03, PNorm = 168.0792, GNorm = 0.3511, lr_0 = 2.6605e-04
Loss = 3.8689e-03, PNorm = 168.0854, GNorm = 0.1923, lr_0 = 2.6587e-04
Loss = 5.3691e-03, PNorm = 168.0896, GNorm = 0.1990, lr_0 = 2.6569e-04
Loss = 3.1716e-03, PNorm = 168.0923, GNorm = 0.2479, lr_0 = 2.6551e-04
Loss = 2.7202e-03, PNorm = 168.0980, GNorm = 0.1080, lr_0 = 2.6533e-04
Loss = 4.7069e-03, PNorm = 168.1031, GNorm = 0.1420, lr_0 = 2.6514e-04
Loss = 3.4615e-03, PNorm = 168.1078, GNorm = 0.2247, lr_0 = 2.6496e-04
Loss = 3.3418e-03, PNorm = 168.1145, GNorm = 0.2338, lr_0 = 2.6478e-04
Loss = 4.8447e-03, PNorm = 168.1206, GNorm = 0.2138, lr_0 = 2.6460e-04
Loss = 4.4072e-03, PNorm = 168.1286, GNorm = 0.4333, lr_0 = 2.6442e-04
Loss = 4.2909e-03, PNorm = 168.1370, GNorm = 0.1976, lr_0 = 2.6424e-04
Loss = 4.0278e-03, PNorm = 168.1451, GNorm = 0.3937, lr_0 = 2.6406e-04
Loss = 3.1732e-03, PNorm = 168.1522, GNorm = 0.2089, lr_0 = 2.6388e-04
Loss = 3.4862e-03, PNorm = 168.1602, GNorm = 0.1764, lr_0 = 2.6369e-04
Loss = 3.3933e-03, PNorm = 168.1679, GNorm = 0.1346, lr_0 = 2.6351e-04
Loss = 3.5108e-03, PNorm = 168.1759, GNorm = 0.1377, lr_0 = 2.6333e-04
Loss = 4.3486e-03, PNorm = 168.1809, GNorm = 0.1492, lr_0 = 2.6315e-04
Loss = 3.7462e-03, PNorm = 168.1865, GNorm = 0.2626, lr_0 = 2.6297e-04
Loss = 3.4315e-03, PNorm = 168.1919, GNorm = 0.0881, lr_0 = 2.6279e-04
Loss = 3.6412e-03, PNorm = 168.2008, GNorm = 0.2266, lr_0 = 2.6261e-04
Loss = 4.7857e-03, PNorm = 168.2070, GNorm = 0.2459, lr_0 = 2.6243e-04
Loss = 5.5916e-03, PNorm = 168.2153, GNorm = 0.2143, lr_0 = 2.6225e-04
Loss = 3.3205e-03, PNorm = 168.2209, GNorm = 0.2148, lr_0 = 2.6207e-04
Loss = 3.3295e-03, PNorm = 168.2297, GNorm = 0.1095, lr_0 = 2.6189e-04
Loss = 3.7681e-03, PNorm = 168.2389, GNorm = 0.0798, lr_0 = 2.6171e-04
Loss = 3.3344e-03, PNorm = 168.2475, GNorm = 0.2527, lr_0 = 2.6153e-04
Loss = 3.8586e-03, PNorm = 168.2566, GNorm = 0.2310, lr_0 = 2.6136e-04
Loss = 3.3180e-03, PNorm = 168.2630, GNorm = 0.1400, lr_0 = 2.6118e-04
Loss = 3.1254e-03, PNorm = 168.2725, GNorm = 0.1575, lr_0 = 2.6100e-04
Loss = 3.8129e-03, PNorm = 168.2801, GNorm = 0.3199, lr_0 = 2.6082e-04
Loss = 3.8783e-03, PNorm = 168.2875, GNorm = 0.1686, lr_0 = 2.6064e-04
Loss = 2.9589e-03, PNorm = 168.2962, GNorm = 0.3031, lr_0 = 2.6046e-04
Loss = 4.0353e-03, PNorm = 168.3053, GNorm = 0.1237, lr_0 = 2.6028e-04
Loss = 5.2413e-03, PNorm = 168.3134, GNorm = 0.2961, lr_0 = 2.6011e-04
Loss = 3.0972e-03, PNorm = 168.3201, GNorm = 0.1820, lr_0 = 2.5993e-04
Loss = 3.0136e-03, PNorm = 168.3268, GNorm = 0.0548, lr_0 = 2.5975e-04
Loss = 4.5797e-03, PNorm = 168.3356, GNorm = 0.1242, lr_0 = 2.5957e-04
Loss = 3.2479e-03, PNorm = 168.3436, GNorm = 0.1592, lr_0 = 2.5939e-04
Loss = 5.1779e-03, PNorm = 168.3506, GNorm = 0.4182, lr_0 = 2.5922e-04
Loss = 3.6573e-03, PNorm = 168.3578, GNorm = 0.1177, lr_0 = 2.5904e-04
Loss = 3.7208e-03, PNorm = 168.3654, GNorm = 0.2611, lr_0 = 2.5886e-04
Loss = 4.4885e-03, PNorm = 168.3700, GNorm = 0.1914, lr_0 = 2.5868e-04
Loss = 5.5184e-03, PNorm = 168.3759, GNorm = 0.0755, lr_0 = 2.5851e-04
Loss = 4.8962e-03, PNorm = 168.3834, GNorm = 0.2415, lr_0 = 2.5833e-04
Loss = 4.6373e-03, PNorm = 168.3915, GNorm = 0.1716, lr_0 = 2.5815e-04
Loss = 3.4643e-03, PNorm = 168.4027, GNorm = 0.3439, lr_0 = 2.5797e-04
Loss = 4.2423e-03, PNorm = 168.4104, GNorm = 0.0784, lr_0 = 2.5780e-04
Loss = 3.3125e-03, PNorm = 168.4163, GNorm = 0.3167, lr_0 = 2.5762e-04
Loss = 2.9991e-03, PNorm = 168.4228, GNorm = 0.3226, lr_0 = 2.5745e-04
Loss = 3.2580e-03, PNorm = 168.4305, GNorm = 0.0909, lr_0 = 2.5727e-04
Loss = 3.6823e-03, PNorm = 168.4378, GNorm = 0.3453, lr_0 = 2.5709e-04
Loss = 3.3335e-03, PNorm = 168.4437, GNorm = 0.1558, lr_0 = 2.5692e-04
Loss = 3.3187e-03, PNorm = 168.4506, GNorm = 0.1008, lr_0 = 2.5674e-04
Loss = 3.3354e-03, PNorm = 168.4600, GNorm = 0.1254, lr_0 = 2.5656e-04
Loss = 4.7239e-03, PNorm = 168.4680, GNorm = 0.1876, lr_0 = 2.5639e-04
Loss = 3.3953e-03, PNorm = 168.4759, GNorm = 0.1995, lr_0 = 2.5621e-04
Loss = 6.0121e-03, PNorm = 168.4826, GNorm = 0.3293, lr_0 = 2.5604e-04
Loss = 4.3601e-03, PNorm = 168.4898, GNorm = 0.6636, lr_0 = 2.5586e-04
Loss = 5.5428e-03, PNorm = 168.4989, GNorm = 0.1272, lr_0 = 2.5569e-04
Loss = 3.5517e-03, PNorm = 168.5061, GNorm = 0.1607, lr_0 = 2.5551e-04
Loss = 3.5204e-03, PNorm = 168.5125, GNorm = 0.1140, lr_0 = 2.5534e-04
Loss = 4.5195e-03, PNorm = 168.5213, GNorm = 0.1134, lr_0 = 2.5516e-04
Loss = 6.5550e-03, PNorm = 168.5281, GNorm = 0.0703, lr_0 = 2.5499e-04
Loss = 2.9530e-03, PNorm = 168.5354, GNorm = 0.1039, lr_0 = 2.5481e-04
Loss = 4.8943e-03, PNorm = 168.5444, GNorm = 0.1007, lr_0 = 2.5464e-04
Loss = 3.7252e-03, PNorm = 168.5513, GNorm = 0.1770, lr_0 = 2.5446e-04
Loss = 4.1380e-03, PNorm = 168.5605, GNorm = 0.1929, lr_0 = 2.5429e-04
Loss = 3.0700e-03, PNorm = 168.5676, GNorm = 0.2706, lr_0 = 2.5411e-04
Loss = 3.0575e-03, PNorm = 168.5750, GNorm = 0.1080, lr_0 = 2.5394e-04
Loss = 4.1361e-03, PNorm = 168.5819, GNorm = 0.0774, lr_0 = 2.5377e-04
Loss = 3.1968e-03, PNorm = 168.5894, GNorm = 0.2474, lr_0 = 2.5359e-04
Loss = 3.0051e-03, PNorm = 168.5956, GNorm = 0.1145, lr_0 = 2.5342e-04
Loss = 3.4080e-03, PNorm = 168.6019, GNorm = 0.1384, lr_0 = 2.5325e-04
Loss = 4.3496e-03, PNorm = 168.6109, GNorm = 0.2867, lr_0 = 2.5307e-04
Loss = 4.1368e-03, PNorm = 168.6185, GNorm = 0.4143, lr_0 = 2.5290e-04
Loss = 3.8069e-03, PNorm = 168.6267, GNorm = 0.0644, lr_0 = 2.5273e-04
Loss = 2.9784e-03, PNorm = 168.6337, GNorm = 0.1147, lr_0 = 2.5255e-04
Loss = 3.7204e-03, PNorm = 168.6406, GNorm = 0.1036, lr_0 = 2.5238e-04
Loss = 5.0873e-03, PNorm = 168.6453, GNorm = 0.2580, lr_0 = 2.5221e-04
Loss = 5.0235e-03, PNorm = 168.6511, GNorm = 0.1144, lr_0 = 2.5203e-04
Loss = 4.1183e-03, PNorm = 168.6579, GNorm = 0.0742, lr_0 = 2.5186e-04
Loss = 3.1264e-03, PNorm = 168.6670, GNorm = 0.2074, lr_0 = 2.5169e-04
Loss = 3.1281e-03, PNorm = 168.6743, GNorm = 0.2325, lr_0 = 2.5152e-04
Loss = 3.5558e-03, PNorm = 168.6837, GNorm = 0.1533, lr_0 = 2.5134e-04
Loss = 3.9170e-03, PNorm = 168.6914, GNorm = 0.2461, lr_0 = 2.5117e-04
Loss = 6.0268e-03, PNorm = 168.7023, GNorm = 0.2326, lr_0 = 2.5100e-04
Loss = 3.3195e-03, PNorm = 168.7119, GNorm = 0.0807, lr_0 = 2.5083e-04
Loss = 4.3753e-03, PNorm = 168.7198, GNorm = 0.2285, lr_0 = 2.5066e-04
Loss = 5.3809e-03, PNorm = 168.7249, GNorm = 0.1430, lr_0 = 2.5048e-04
Loss = 4.9472e-03, PNorm = 168.7333, GNorm = 0.0959, lr_0 = 2.5031e-04
Loss = 4.6278e-03, PNorm = 168.7416, GNorm = 0.0881, lr_0 = 2.5014e-04
Loss = 3.0528e-03, PNorm = 168.7496, GNorm = 0.1073, lr_0 = 2.4997e-04
Loss = 4.1538e-03, PNorm = 168.7554, GNorm = 0.3086, lr_0 = 2.4980e-04
Loss = 4.5257e-03, PNorm = 168.7630, GNorm = 0.0659, lr_0 = 2.4963e-04
Loss = 4.1351e-03, PNorm = 168.7713, GNorm = 0.2687, lr_0 = 2.4946e-04
Loss = 3.4823e-03, PNorm = 168.7773, GNorm = 0.1684, lr_0 = 2.4929e-04
Loss = 3.3312e-03, PNorm = 168.7867, GNorm = 0.2568, lr_0 = 2.4911e-04
Loss = 4.9740e-03, PNorm = 168.7953, GNorm = 0.4335, lr_0 = 2.4894e-04
Loss = 5.7244e-03, PNorm = 168.8041, GNorm = 0.5777, lr_0 = 2.4877e-04
Loss = 4.3312e-03, PNorm = 168.8144, GNorm = 0.2491, lr_0 = 2.4860e-04
Loss = 3.6646e-03, PNorm = 168.8195, GNorm = 0.2113, lr_0 = 2.4843e-04
Loss = 4.3105e-03, PNorm = 168.8266, GNorm = 0.1652, lr_0 = 2.4826e-04
Loss = 6.3310e-03, PNorm = 168.8341, GNorm = 0.3881, lr_0 = 2.4809e-04
Loss = 5.2395e-03, PNorm = 168.8429, GNorm = 0.3213, lr_0 = 2.4792e-04
Loss = 3.2852e-03, PNorm = 168.8505, GNorm = 0.1488, lr_0 = 2.4775e-04
Loss = 4.6016e-03, PNorm = 168.8588, GNorm = 0.0824, lr_0 = 2.4758e-04
Loss = 4.5578e-03, PNorm = 168.8669, GNorm = 0.2720, lr_0 = 2.4741e-04
Loss = 4.0412e-03, PNorm = 168.8746, GNorm = 0.0830, lr_0 = 2.4724e-04
Loss = 2.8131e-03, PNorm = 168.8846, GNorm = 0.0583, lr_0 = 2.4707e-04
Validation mae = 0.278583
Epoch 19
Loss = 3.1792e-03, PNorm = 168.8919, GNorm = 0.2026, lr_0 = 2.4690e-04
Loss = 2.5632e-03, PNorm = 168.8974, GNorm = 0.1844, lr_0 = 2.4674e-04
Loss = 2.6887e-03, PNorm = 168.8999, GNorm = 0.1228, lr_0 = 2.4657e-04
Loss = 2.8799e-03, PNorm = 168.9047, GNorm = 0.1164, lr_0 = 2.4640e-04
Loss = 3.4996e-03, PNorm = 168.9082, GNorm = 0.2226, lr_0 = 2.4623e-04
Loss = 4.8580e-03, PNorm = 168.9140, GNorm = 0.1970, lr_0 = 2.4606e-04
Loss = 5.1079e-03, PNorm = 168.9193, GNorm = 0.2344, lr_0 = 2.4589e-04
Loss = 3.2413e-03, PNorm = 168.9278, GNorm = 0.1508, lr_0 = 2.4572e-04
Loss = 3.4879e-03, PNorm = 168.9342, GNorm = 0.1296, lr_0 = 2.4556e-04
Loss = 2.8499e-03, PNorm = 168.9411, GNorm = 0.0984, lr_0 = 2.4539e-04
Loss = 3.5905e-03, PNorm = 168.9497, GNorm = 0.2712, lr_0 = 2.4522e-04
Loss = 3.2469e-03, PNorm = 168.9537, GNorm = 0.0764, lr_0 = 2.4505e-04
Loss = 3.5055e-03, PNorm = 168.9589, GNorm = 0.4980, lr_0 = 2.4488e-04
Loss = 3.5856e-03, PNorm = 168.9638, GNorm = 0.0863, lr_0 = 2.4472e-04
Loss = 5.8549e-03, PNorm = 168.9680, GNorm = 0.4828, lr_0 = 2.4455e-04
Loss = 3.0274e-03, PNorm = 168.9710, GNorm = 0.1567, lr_0 = 2.4438e-04
Loss = 4.1946e-03, PNorm = 168.9787, GNorm = 0.2189, lr_0 = 2.4421e-04
Loss = 3.7727e-03, PNorm = 168.9847, GNorm = 0.2913, lr_0 = 2.4405e-04
Loss = 2.9034e-03, PNorm = 168.9919, GNorm = 0.1234, lr_0 = 2.4388e-04
Loss = 2.8743e-03, PNorm = 168.9986, GNorm = 0.2009, lr_0 = 2.4371e-04
Loss = 3.0449e-03, PNorm = 169.0031, GNorm = 0.0603, lr_0 = 2.4354e-04
Loss = 3.9405e-03, PNorm = 169.0073, GNorm = 0.3909, lr_0 = 2.4338e-04
Loss = 5.8506e-03, PNorm = 169.0124, GNorm = 0.1508, lr_0 = 2.4321e-04
Loss = 2.8745e-03, PNorm = 169.0177, GNorm = 0.0425, lr_0 = 2.4304e-04
Loss = 3.9737e-03, PNorm = 169.0240, GNorm = 0.1921, lr_0 = 2.4288e-04
Loss = 5.6014e-03, PNorm = 169.0333, GNorm = 0.3702, lr_0 = 2.4271e-04
Loss = 2.8188e-03, PNorm = 169.0415, GNorm = 0.0958, lr_0 = 2.4254e-04
Loss = 4.4662e-03, PNorm = 169.0470, GNorm = 0.2114, lr_0 = 2.4238e-04
Loss = 3.9909e-03, PNorm = 169.0518, GNorm = 0.0849, lr_0 = 2.4221e-04
Loss = 2.2178e-03, PNorm = 169.0573, GNorm = 0.1733, lr_0 = 2.4205e-04
Loss = 3.5130e-03, PNorm = 169.0635, GNorm = 0.3713, lr_0 = 2.4188e-04
Loss = 3.8353e-03, PNorm = 169.0695, GNorm = 0.1123, lr_0 = 2.4171e-04
Loss = 3.1479e-03, PNorm = 169.0752, GNorm = 0.2865, lr_0 = 2.4155e-04
Loss = 3.5584e-03, PNorm = 169.0818, GNorm = 0.2950, lr_0 = 2.4138e-04
Loss = 3.5780e-03, PNorm = 169.0898, GNorm = 0.0801, lr_0 = 2.4122e-04
Loss = 3.4372e-03, PNorm = 169.0963, GNorm = 0.1506, lr_0 = 2.4105e-04
Loss = 3.2029e-03, PNorm = 169.1025, GNorm = 0.0471, lr_0 = 2.4089e-04
Loss = 4.5718e-03, PNorm = 169.1094, GNorm = 0.0681, lr_0 = 2.4072e-04
Loss = 2.6949e-03, PNorm = 169.1160, GNorm = 0.2017, lr_0 = 2.4056e-04
Loss = 3.1616e-03, PNorm = 169.1248, GNorm = 0.2131, lr_0 = 2.4039e-04
Loss = 4.9466e-03, PNorm = 169.1292, GNorm = 0.2113, lr_0 = 2.4023e-04
Loss = 3.8840e-03, PNorm = 169.1350, GNorm = 0.1377, lr_0 = 2.4006e-04
Loss = 3.0627e-03, PNorm = 169.1391, GNorm = 0.0513, lr_0 = 2.3990e-04
Loss = 3.2594e-03, PNorm = 169.1460, GNorm = 0.0718, lr_0 = 2.3974e-04
Loss = 4.8338e-03, PNorm = 169.1504, GNorm = 0.1473, lr_0 = 2.3957e-04
Loss = 2.5087e-03, PNorm = 169.1573, GNorm = 0.1224, lr_0 = 2.3941e-04
Loss = 2.9309e-03, PNorm = 169.1628, GNorm = 0.5698, lr_0 = 2.3924e-04
Loss = 3.0358e-03, PNorm = 169.1722, GNorm = 0.2224, lr_0 = 2.3908e-04
Loss = 2.6548e-03, PNorm = 169.1787, GNorm = 0.0620, lr_0 = 2.3892e-04
Loss = 2.6417e-03, PNorm = 169.1846, GNorm = 0.1623, lr_0 = 2.3875e-04
Loss = 4.2860e-03, PNorm = 169.1892, GNorm = 0.1108, lr_0 = 2.3859e-04
Loss = 2.5843e-03, PNorm = 169.1957, GNorm = 0.2832, lr_0 = 2.3842e-04
Loss = 3.1054e-03, PNorm = 169.2002, GNorm = 0.1891, lr_0 = 2.3826e-04
Loss = 2.7346e-03, PNorm = 169.2051, GNorm = 0.1354, lr_0 = 2.3810e-04
Loss = 2.3926e-03, PNorm = 169.2110, GNorm = 0.0820, lr_0 = 2.3794e-04
Loss = 3.3662e-03, PNorm = 169.2160, GNorm = 0.0719, lr_0 = 2.3777e-04
Loss = 2.2753e-03, PNorm = 169.2223, GNorm = 0.1838, lr_0 = 2.3761e-04
Loss = 3.1228e-03, PNorm = 169.2284, GNorm = 0.1349, lr_0 = 2.3745e-04
Loss = 4.6144e-03, PNorm = 169.2338, GNorm = 0.0658, lr_0 = 2.3728e-04
Loss = 3.5299e-03, PNorm = 169.2412, GNorm = 0.2876, lr_0 = 2.3712e-04
Loss = 3.8954e-03, PNorm = 169.2481, GNorm = 0.3719, lr_0 = 2.3696e-04
Loss = 4.4696e-03, PNorm = 169.2544, GNorm = 0.1964, lr_0 = 2.3680e-04
Loss = 4.1540e-03, PNorm = 169.2597, GNorm = 0.2890, lr_0 = 2.3663e-04
Loss = 2.6208e-03, PNorm = 169.2676, GNorm = 0.1464, lr_0 = 2.3647e-04
Loss = 3.6382e-03, PNorm = 169.2743, GNorm = 0.1538, lr_0 = 2.3631e-04
Loss = 3.8800e-03, PNorm = 169.2798, GNorm = 0.0969, lr_0 = 2.3615e-04
Loss = 4.2649e-03, PNorm = 169.2841, GNorm = 0.1587, lr_0 = 2.3599e-04
Loss = 3.2404e-03, PNorm = 169.2892, GNorm = 0.1807, lr_0 = 2.3582e-04
Loss = 3.0881e-03, PNorm = 169.2943, GNorm = 0.1137, lr_0 = 2.3566e-04
Loss = 3.9759e-03, PNorm = 169.3023, GNorm = 0.3864, lr_0 = 2.3550e-04
Loss = 2.7195e-03, PNorm = 169.3070, GNorm = 0.2258, lr_0 = 2.3534e-04
Loss = 2.9021e-03, PNorm = 169.3139, GNorm = 0.2671, lr_0 = 2.3518e-04
Loss = 3.5621e-03, PNorm = 169.3185, GNorm = 0.2034, lr_0 = 2.3502e-04
Loss = 3.5262e-03, PNorm = 169.3243, GNorm = 0.1503, lr_0 = 2.3486e-04
Loss = 3.4907e-03, PNorm = 169.3312, GNorm = 0.2504, lr_0 = 2.3470e-04
Loss = 2.8774e-03, PNorm = 169.3359, GNorm = 0.1348, lr_0 = 2.3454e-04
Loss = 3.0370e-03, PNorm = 169.3386, GNorm = 0.2136, lr_0 = 2.3437e-04
Loss = 3.9963e-03, PNorm = 169.3440, GNorm = 0.2356, lr_0 = 2.3421e-04
Loss = 3.7800e-03, PNorm = 169.3505, GNorm = 0.0652, lr_0 = 2.3405e-04
Loss = 3.1856e-03, PNorm = 169.3578, GNorm = 0.2220, lr_0 = 2.3389e-04
Loss = 3.1807e-03, PNorm = 169.3628, GNorm = 0.3153, lr_0 = 2.3373e-04
Loss = 3.1030e-03, PNorm = 169.3709, GNorm = 0.1273, lr_0 = 2.3357e-04
Loss = 2.9821e-03, PNorm = 169.3797, GNorm = 0.2740, lr_0 = 2.3341e-04
Loss = 2.8133e-03, PNorm = 169.3860, GNorm = 0.1088, lr_0 = 2.3325e-04
Loss = 2.5269e-03, PNorm = 169.3918, GNorm = 0.0660, lr_0 = 2.3309e-04
Loss = 2.7977e-03, PNorm = 169.3966, GNorm = 0.1360, lr_0 = 2.3293e-04
Loss = 2.3739e-03, PNorm = 169.4018, GNorm = 0.2711, lr_0 = 2.3277e-04
Loss = 2.6590e-03, PNorm = 169.4078, GNorm = 0.0704, lr_0 = 2.3261e-04
Loss = 3.8118e-03, PNorm = 169.4134, GNorm = 0.1467, lr_0 = 2.3246e-04
Loss = 2.5773e-03, PNorm = 169.4202, GNorm = 0.0759, lr_0 = 2.3230e-04
Loss = 3.1662e-03, PNorm = 169.4253, GNorm = 0.1708, lr_0 = 2.3214e-04
Loss = 3.2913e-03, PNorm = 169.4355, GNorm = 0.2470, lr_0 = 2.3198e-04
Loss = 3.5696e-03, PNorm = 169.4448, GNorm = 0.4551, lr_0 = 2.3182e-04
Loss = 4.8100e-03, PNorm = 169.4505, GNorm = 0.1411, lr_0 = 2.3166e-04
Loss = 2.7235e-03, PNorm = 169.4556, GNorm = 0.1813, lr_0 = 2.3150e-04
Loss = 2.9182e-03, PNorm = 169.4611, GNorm = 0.1124, lr_0 = 2.3134e-04
Loss = 3.7024e-03, PNorm = 169.4678, GNorm = 0.0917, lr_0 = 2.3118e-04
Loss = 3.4567e-03, PNorm = 169.4749, GNorm = 0.0893, lr_0 = 2.3103e-04
Loss = 2.5130e-03, PNorm = 169.4820, GNorm = 0.0681, lr_0 = 2.3087e-04
Loss = 3.7662e-03, PNorm = 169.4891, GNorm = 0.1166, lr_0 = 2.3071e-04
Loss = 4.5923e-03, PNorm = 169.4941, GNorm = 0.2101, lr_0 = 2.3055e-04
Loss = 3.0384e-03, PNorm = 169.5008, GNorm = 0.3472, lr_0 = 2.3039e-04
Loss = 4.8586e-03, PNorm = 169.5073, GNorm = 0.1963, lr_0 = 2.3024e-04
Loss = 3.7858e-03, PNorm = 169.5150, GNorm = 0.0771, lr_0 = 2.3008e-04
Loss = 4.3711e-03, PNorm = 169.5225, GNorm = 0.1366, lr_0 = 2.2992e-04
Loss = 3.3090e-03, PNorm = 169.5287, GNorm = 0.1006, lr_0 = 2.2976e-04
Loss = 4.3003e-03, PNorm = 169.5356, GNorm = 0.4590, lr_0 = 2.2961e-04
Loss = 2.9382e-03, PNorm = 169.5431, GNorm = 0.1981, lr_0 = 2.2945e-04
Loss = 3.6269e-03, PNorm = 169.5514, GNorm = 0.1895, lr_0 = 2.2929e-04
Loss = 3.1504e-03, PNorm = 169.5595, GNorm = 0.1947, lr_0 = 2.2913e-04
Loss = 6.6257e-03, PNorm = 169.5646, GNorm = 0.2102, lr_0 = 2.2898e-04
Loss = 4.5877e-03, PNorm = 169.5694, GNorm = 0.1405, lr_0 = 2.2882e-04
Loss = 3.0370e-03, PNorm = 169.5755, GNorm = 0.1582, lr_0 = 2.2866e-04
Loss = 2.7119e-03, PNorm = 169.5825, GNorm = 0.3221, lr_0 = 2.2851e-04
Loss = 3.4499e-03, PNorm = 169.5882, GNorm = 0.3578, lr_0 = 2.2835e-04
Loss = 2.9440e-03, PNorm = 169.5961, GNorm = 0.1093, lr_0 = 2.2819e-04
Loss = 3.0335e-03, PNorm = 169.6055, GNorm = 0.0603, lr_0 = 2.2804e-04
Loss = 3.2095e-03, PNorm = 169.6131, GNorm = 0.2222, lr_0 = 2.2788e-04
Loss = 3.6713e-03, PNorm = 169.6173, GNorm = 0.0679, lr_0 = 2.2773e-04
Loss = 6.6078e-03, PNorm = 169.6216, GNorm = 0.1723, lr_0 = 2.2757e-04
Validation mae = 0.278337
Epoch 20
Loss = 3.2133e-03, PNorm = 169.6265, GNorm = 0.1576, lr_0 = 2.2741e-04
Loss = 3.1647e-03, PNorm = 169.6305, GNorm = 0.1677, lr_0 = 2.2726e-04
Loss = 2.6789e-03, PNorm = 169.6342, GNorm = 0.1011, lr_0 = 2.2710e-04
Loss = 2.0648e-03, PNorm = 169.6385, GNorm = 0.1644, lr_0 = 2.2695e-04
Loss = 2.8916e-03, PNorm = 169.6423, GNorm = 0.2475, lr_0 = 2.2679e-04
Loss = 3.2313e-03, PNorm = 169.6467, GNorm = 0.3045, lr_0 = 2.2664e-04
Loss = 5.5287e-03, PNorm = 169.6518, GNorm = 0.1828, lr_0 = 2.2648e-04
Loss = 2.7846e-03, PNorm = 169.6569, GNorm = 0.1206, lr_0 = 2.2632e-04
Loss = 2.7523e-03, PNorm = 169.6618, GNorm = 0.2424, lr_0 = 2.2617e-04
Loss = 3.7119e-03, PNorm = 169.6655, GNorm = 0.2581, lr_0 = 2.2601e-04
Loss = 2.9129e-03, PNorm = 169.6699, GNorm = 0.0941, lr_0 = 2.2586e-04
Loss = 3.0718e-03, PNorm = 169.6748, GNorm = 0.0660, lr_0 = 2.2571e-04
Loss = 4.9829e-03, PNorm = 169.6808, GNorm = 0.3097, lr_0 = 2.2555e-04
Loss = 2.7553e-03, PNorm = 169.6876, GNorm = 0.2294, lr_0 = 2.2540e-04
Loss = 2.3436e-03, PNorm = 169.6941, GNorm = 0.2060, lr_0 = 2.2524e-04
Loss = 2.6472e-03, PNorm = 169.7012, GNorm = 0.0477, lr_0 = 2.2509e-04
Loss = 2.8502e-03, PNorm = 169.7076, GNorm = 0.2049, lr_0 = 2.2493e-04
Loss = 2.9091e-03, PNorm = 169.7135, GNorm = 0.2853, lr_0 = 2.2478e-04
Loss = 2.6827e-03, PNorm = 169.7200, GNorm = 0.3183, lr_0 = 2.2463e-04
Loss = 2.4612e-03, PNorm = 169.7227, GNorm = 0.2065, lr_0 = 2.2447e-04
Loss = 4.4216e-03, PNorm = 169.7289, GNorm = 0.2230, lr_0 = 2.2432e-04
Loss = 2.2692e-03, PNorm = 169.7347, GNorm = 0.1709, lr_0 = 2.2416e-04
Loss = 2.4509e-03, PNorm = 169.7421, GNorm = 0.1689, lr_0 = 2.2401e-04
Loss = 2.4082e-03, PNorm = 169.7452, GNorm = 0.1630, lr_0 = 2.2386e-04
Loss = 2.9289e-03, PNorm = 169.7478, GNorm = 0.0996, lr_0 = 2.2370e-04
Loss = 4.4203e-03, PNorm = 169.7511, GNorm = 0.1199, lr_0 = 2.2355e-04
Loss = 2.5490e-03, PNorm = 169.7583, GNorm = 0.2533, lr_0 = 2.2340e-04
Loss = 2.8730e-03, PNorm = 169.7649, GNorm = 0.1062, lr_0 = 2.2324e-04
Loss = 2.9647e-03, PNorm = 169.7708, GNorm = 0.3316, lr_0 = 2.2309e-04
Loss = 2.6367e-03, PNorm = 169.7774, GNorm = 0.1280, lr_0 = 2.2294e-04
Loss = 2.8129e-03, PNorm = 169.7834, GNorm = 0.1190, lr_0 = 2.2279e-04
Loss = 2.7129e-03, PNorm = 169.7910, GNorm = 0.1058, lr_0 = 2.2263e-04
Loss = 2.6989e-03, PNorm = 169.7970, GNorm = 0.2467, lr_0 = 2.2248e-04
Loss = 4.4352e-03, PNorm = 169.8032, GNorm = 0.1932, lr_0 = 2.2233e-04
Loss = 3.7412e-03, PNorm = 169.8098, GNorm = 0.6813, lr_0 = 2.2218e-04
Loss = 2.3749e-03, PNorm = 169.8145, GNorm = 0.2038, lr_0 = 2.2202e-04
Loss = 2.2458e-03, PNorm = 169.8203, GNorm = 0.1768, lr_0 = 2.2187e-04
Loss = 3.6251e-03, PNorm = 169.8219, GNorm = 0.3461, lr_0 = 2.2172e-04
Loss = 2.4038e-03, PNorm = 169.8273, GNorm = 0.2054, lr_0 = 2.2157e-04
Loss = 3.0235e-03, PNorm = 169.8339, GNorm = 0.1620, lr_0 = 2.2142e-04
Loss = 2.4760e-03, PNorm = 169.8410, GNorm = 0.1960, lr_0 = 2.2126e-04
Loss = 2.7837e-03, PNorm = 169.8453, GNorm = 0.1920, lr_0 = 2.2111e-04
Loss = 3.3335e-03, PNorm = 169.8489, GNorm = 0.1624, lr_0 = 2.2096e-04
Loss = 3.1566e-03, PNorm = 169.8542, GNorm = 0.1695, lr_0 = 2.2081e-04
Loss = 3.0082e-03, PNorm = 169.8589, GNorm = 0.0590, lr_0 = 2.2066e-04
Loss = 2.1232e-03, PNorm = 169.8637, GNorm = 0.0522, lr_0 = 2.2051e-04
Loss = 3.3766e-03, PNorm = 169.8702, GNorm = 0.0960, lr_0 = 2.2036e-04
Loss = 2.0831e-03, PNorm = 169.8765, GNorm = 0.1476, lr_0 = 2.2021e-04
Loss = 2.2986e-03, PNorm = 169.8807, GNorm = 0.0636, lr_0 = 2.2005e-04
Loss = 6.2340e-03, PNorm = 169.8860, GNorm = 0.2229, lr_0 = 2.1990e-04
Loss = 3.3756e-03, PNorm = 169.8898, GNorm = 0.1174, lr_0 = 2.1975e-04
Loss = 2.4615e-03, PNorm = 169.8948, GNorm = 0.2226, lr_0 = 2.1960e-04
Loss = 2.4532e-03, PNorm = 169.9001, GNorm = 0.1035, lr_0 = 2.1945e-04
Loss = 3.1846e-03, PNorm = 169.9056, GNorm = 0.3878, lr_0 = 2.1930e-04
Loss = 2.4428e-03, PNorm = 169.9113, GNorm = 0.0481, lr_0 = 2.1915e-04
Loss = 3.2016e-03, PNorm = 169.9169, GNorm = 0.0586, lr_0 = 2.1900e-04
Loss = 3.0618e-03, PNorm = 169.9217, GNorm = 0.2430, lr_0 = 2.1885e-04
Loss = 2.9731e-03, PNorm = 169.9279, GNorm = 0.0869, lr_0 = 2.1870e-04
Loss = 3.1549e-03, PNorm = 169.9330, GNorm = 0.3185, lr_0 = 2.1855e-04
Loss = 2.3913e-03, PNorm = 169.9377, GNorm = 0.1685, lr_0 = 2.1840e-04
Loss = 2.4079e-03, PNorm = 169.9448, GNorm = 0.1573, lr_0 = 2.1825e-04
Loss = 5.0221e-03, PNorm = 169.9518, GNorm = 0.1986, lr_0 = 2.1810e-04
Loss = 2.7147e-03, PNorm = 169.9599, GNorm = 0.1025, lr_0 = 2.1795e-04
Loss = 3.6046e-03, PNorm = 169.9677, GNorm = 0.0693, lr_0 = 2.1780e-04
Loss = 2.3792e-03, PNorm = 169.9738, GNorm = 0.1629, lr_0 = 2.1765e-04
Loss = 2.7974e-03, PNorm = 169.9783, GNorm = 0.1792, lr_0 = 2.1751e-04
Loss = 3.8099e-03, PNorm = 169.9834, GNorm = 0.1216, lr_0 = 2.1736e-04
Loss = 2.3350e-03, PNorm = 169.9864, GNorm = 0.1798, lr_0 = 2.1721e-04
Loss = 5.4741e-03, PNorm = 169.9933, GNorm = 0.1808, lr_0 = 2.1706e-04
Loss = 2.9845e-03, PNorm = 170.0002, GNorm = 0.0586, lr_0 = 2.1691e-04
Loss = 2.4525e-03, PNorm = 170.0031, GNorm = 0.1458, lr_0 = 2.1676e-04
Loss = 4.4989e-03, PNorm = 170.0045, GNorm = 0.1467, lr_0 = 2.1661e-04
Loss = 2.7492e-03, PNorm = 170.0090, GNorm = 0.1389, lr_0 = 2.1646e-04
Loss = 2.7168e-03, PNorm = 170.0133, GNorm = 0.3099, lr_0 = 2.1632e-04
Loss = 2.5293e-03, PNorm = 170.0206, GNorm = 0.1795, lr_0 = 2.1617e-04
Loss = 4.8491e-03, PNorm = 170.0250, GNorm = 0.1248, lr_0 = 2.1602e-04
Loss = 2.3121e-03, PNorm = 170.0297, GNorm = 0.3501, lr_0 = 2.1587e-04
Loss = 2.0534e-03, PNorm = 170.0349, GNorm = 0.0781, lr_0 = 2.1572e-04
Loss = 4.2775e-03, PNorm = 170.0419, GNorm = 0.0938, lr_0 = 2.1558e-04
Loss = 2.3483e-03, PNorm = 170.0479, GNorm = 0.0717, lr_0 = 2.1543e-04
Loss = 3.7444e-03, PNorm = 170.0516, GNorm = 0.2053, lr_0 = 2.1528e-04
Loss = 2.0544e-03, PNorm = 170.0570, GNorm = 0.1652, lr_0 = 2.1513e-04
Loss = 2.4505e-03, PNorm = 170.0625, GNorm = 0.1389, lr_0 = 2.1499e-04
Loss = 2.3840e-03, PNorm = 170.0690, GNorm = 0.1933, lr_0 = 2.1484e-04
Loss = 2.8581e-03, PNorm = 170.0749, GNorm = 0.2297, lr_0 = 2.1469e-04
Loss = 2.1119e-03, PNorm = 170.0802, GNorm = 0.1445, lr_0 = 2.1454e-04
Loss = 3.0527e-03, PNorm = 170.0857, GNorm = 0.1808, lr_0 = 2.1440e-04
Loss = 2.4317e-03, PNorm = 170.0892, GNorm = 0.1954, lr_0 = 2.1425e-04
Loss = 2.6182e-03, PNorm = 170.0941, GNorm = 0.0782, lr_0 = 2.1410e-04
Loss = 2.4278e-03, PNorm = 170.0998, GNorm = 0.1292, lr_0 = 2.1396e-04
Loss = 4.0204e-03, PNorm = 170.1054, GNorm = 0.0604, lr_0 = 2.1381e-04
Loss = 4.5320e-03, PNorm = 170.1099, GNorm = 0.1596, lr_0 = 2.1366e-04
Loss = 4.3329e-03, PNorm = 170.1151, GNorm = 0.1048, lr_0 = 2.1352e-04
Loss = 3.3520e-03, PNorm = 170.1212, GNorm = 0.2226, lr_0 = 2.1337e-04
Loss = 3.0281e-03, PNorm = 170.1263, GNorm = 0.4199, lr_0 = 2.1323e-04
Loss = 2.0261e-03, PNorm = 170.1316, GNorm = 0.1966, lr_0 = 2.1308e-04
Loss = 3.6373e-03, PNorm = 170.1365, GNorm = 0.1370, lr_0 = 2.1293e-04
Loss = 4.4708e-03, PNorm = 170.1432, GNorm = 0.2836, lr_0 = 2.1279e-04
Loss = 2.0814e-03, PNorm = 170.1487, GNorm = 0.1152, lr_0 = 2.1264e-04
Loss = 2.5882e-03, PNorm = 170.1527, GNorm = 0.1423, lr_0 = 2.1250e-04
Loss = 6.2147e-03, PNorm = 170.1561, GNorm = 0.2071, lr_0 = 2.1235e-04
Loss = 4.1156e-03, PNorm = 170.1583, GNorm = 0.2993, lr_0 = 2.1221e-04
Loss = 5.7254e-03, PNorm = 170.1629, GNorm = 0.0986, lr_0 = 2.1206e-04
Loss = 2.2831e-03, PNorm = 170.1695, GNorm = 0.1690, lr_0 = 2.1191e-04
Loss = 2.2698e-03, PNorm = 170.1743, GNorm = 0.2728, lr_0 = 2.1177e-04
Loss = 2.5476e-03, PNorm = 170.1804, GNorm = 0.0739, lr_0 = 2.1162e-04
Loss = 2.2005e-03, PNorm = 170.1848, GNorm = 0.1038, lr_0 = 2.1148e-04
Loss = 2.6540e-03, PNorm = 170.1899, GNorm = 0.1783, lr_0 = 2.1133e-04
Loss = 2.3889e-03, PNorm = 170.1956, GNorm = 0.1079, lr_0 = 2.1119e-04
Loss = 3.2135e-03, PNorm = 170.2019, GNorm = 0.1431, lr_0 = 2.1104e-04
Loss = 3.9379e-03, PNorm = 170.2079, GNorm = 0.1226, lr_0 = 2.1090e-04
Loss = 2.7486e-03, PNorm = 170.2141, GNorm = 0.1138, lr_0 = 2.1076e-04
Loss = 3.7903e-03, PNorm = 170.2180, GNorm = 0.4730, lr_0 = 2.1061e-04
Loss = 3.6140e-03, PNorm = 170.2218, GNorm = 0.0870, lr_0 = 2.1047e-04
Loss = 2.1805e-03, PNorm = 170.2263, GNorm = 0.1057, lr_0 = 2.1032e-04
Loss = 2.4088e-03, PNorm = 170.2311, GNorm = 0.1064, lr_0 = 2.1018e-04
Loss = 2.7439e-03, PNorm = 170.2367, GNorm = 0.1668, lr_0 = 2.1003e-04
Loss = 2.5799e-03, PNorm = 170.2432, GNorm = 0.1356, lr_0 = 2.0989e-04
Loss = 3.3706e-03, PNorm = 170.2491, GNorm = 0.0739, lr_0 = 2.0975e-04
Loss = 2.4808e-03, PNorm = 170.2546, GNorm = 0.1325, lr_0 = 2.0960e-04
Validation mae = 0.278406
Epoch 21
Loss = 3.1568e-03, PNorm = 170.2610, GNorm = 0.2828, lr_0 = 2.0946e-04
Loss = 1.9277e-03, PNorm = 170.2650, GNorm = 0.0763, lr_0 = 2.0932e-04
Loss = 2.4916e-03, PNorm = 170.2689, GNorm = 0.0908, lr_0 = 2.0917e-04
Loss = 3.5190e-03, PNorm = 170.2747, GNorm = 0.1380, lr_0 = 2.0903e-04
Loss = 2.2341e-03, PNorm = 170.2788, GNorm = 0.1306, lr_0 = 2.0889e-04
Loss = 4.2474e-03, PNorm = 170.2791, GNorm = 0.1644, lr_0 = 2.0874e-04
Loss = 3.2405e-03, PNorm = 170.2827, GNorm = 0.2500, lr_0 = 2.0860e-04
Loss = 2.1534e-03, PNorm = 170.2880, GNorm = 0.1925, lr_0 = 2.0846e-04
Loss = 2.7239e-03, PNorm = 170.2941, GNorm = 0.2250, lr_0 = 2.0831e-04
Loss = 2.6372e-03, PNorm = 170.2983, GNorm = 0.1742, lr_0 = 2.0817e-04
Loss = 2.6998e-03, PNorm = 170.3022, GNorm = 0.2301, lr_0 = 2.0803e-04
Loss = 3.3633e-03, PNorm = 170.3056, GNorm = 0.2420, lr_0 = 2.0789e-04
Loss = 2.3797e-03, PNorm = 170.3110, GNorm = 0.1971, lr_0 = 2.0774e-04
Loss = 2.7658e-03, PNorm = 170.3166, GNorm = 0.1781, lr_0 = 2.0760e-04
Loss = 3.6437e-03, PNorm = 170.3216, GNorm = 0.0575, lr_0 = 2.0746e-04
Loss = 2.2916e-03, PNorm = 170.3265, GNorm = 0.0716, lr_0 = 2.0732e-04
Loss = 1.9262e-03, PNorm = 170.3290, GNorm = 0.2116, lr_0 = 2.0718e-04
Loss = 2.6339e-03, PNorm = 170.3317, GNorm = 0.1469, lr_0 = 2.0703e-04
Loss = 2.9920e-03, PNorm = 170.3346, GNorm = 0.0678, lr_0 = 2.0689e-04
Loss = 2.0693e-03, PNorm = 170.3410, GNorm = 0.1963, lr_0 = 2.0675e-04
Loss = 1.9945e-03, PNorm = 170.3455, GNorm = 0.1269, lr_0 = 2.0661e-04
Loss = 1.9347e-03, PNorm = 170.3478, GNorm = 0.2145, lr_0 = 2.0647e-04
Loss = 4.4252e-03, PNorm = 170.3474, GNorm = 0.0703, lr_0 = 2.0633e-04
Loss = 2.9386e-03, PNorm = 170.3501, GNorm = 0.2799, lr_0 = 2.0618e-04
Loss = 2.0051e-03, PNorm = 170.3542, GNorm = 0.1938, lr_0 = 2.0604e-04
Loss = 2.0071e-03, PNorm = 170.3573, GNorm = 0.1624, lr_0 = 2.0590e-04
Loss = 3.7030e-03, PNorm = 170.3610, GNorm = 0.0921, lr_0 = 2.0576e-04
Loss = 3.5473e-03, PNorm = 170.3648, GNorm = 0.1277, lr_0 = 2.0562e-04
Loss = 2.7285e-03, PNorm = 170.3684, GNorm = 0.2074, lr_0 = 2.0548e-04
Loss = 3.2126e-03, PNorm = 170.3739, GNorm = 0.1205, lr_0 = 2.0534e-04
Loss = 2.1115e-03, PNorm = 170.3786, GNorm = 0.1018, lr_0 = 2.0520e-04
Loss = 5.8479e-03, PNorm = 170.3847, GNorm = 0.0961, lr_0 = 2.0506e-04
Loss = 2.2422e-03, PNorm = 170.3904, GNorm = 0.0583, lr_0 = 2.0492e-04
Loss = 2.7623e-03, PNorm = 170.3939, GNorm = 0.1787, lr_0 = 2.0478e-04
Loss = 2.6290e-03, PNorm = 170.3980, GNorm = 0.1726, lr_0 = 2.0464e-04
Loss = 1.8178e-03, PNorm = 170.4011, GNorm = 0.0507, lr_0 = 2.0450e-04
Loss = 3.1715e-03, PNorm = 170.4039, GNorm = 0.0760, lr_0 = 2.0436e-04
Loss = 2.9445e-03, PNorm = 170.4070, GNorm = 0.2043, lr_0 = 2.0422e-04
Loss = 2.5377e-03, PNorm = 170.4116, GNorm = 0.1588, lr_0 = 2.0408e-04
Loss = 1.7085e-03, PNorm = 170.4146, GNorm = 0.1867, lr_0 = 2.0394e-04
Loss = 2.9109e-03, PNorm = 170.4178, GNorm = 0.1145, lr_0 = 2.0380e-04
Loss = 3.5696e-03, PNorm = 170.4206, GNorm = 0.4541, lr_0 = 2.0366e-04
Loss = 3.8542e-03, PNorm = 170.4231, GNorm = 0.1441, lr_0 = 2.0352e-04
Loss = 2.4886e-03, PNorm = 170.4285, GNorm = 0.2582, lr_0 = 2.0338e-04
Loss = 2.7062e-03, PNorm = 170.4358, GNorm = 0.1187, lr_0 = 2.0324e-04
Loss = 3.0360e-03, PNorm = 170.4409, GNorm = 0.1479, lr_0 = 2.0310e-04
Loss = 1.8458e-03, PNorm = 170.4460, GNorm = 0.0726, lr_0 = 2.0296e-04
Loss = 1.4903e-03, PNorm = 170.4494, GNorm = 0.0953, lr_0 = 2.0282e-04
Loss = 2.2427e-03, PNorm = 170.4544, GNorm = 0.2549, lr_0 = 2.0268e-04
Loss = 2.0535e-03, PNorm = 170.4592, GNorm = 0.1594, lr_0 = 2.0254e-04
Loss = 2.5462e-03, PNorm = 170.4644, GNorm = 0.2061, lr_0 = 2.0240e-04
Loss = 3.5303e-03, PNorm = 170.4698, GNorm = 0.0478, lr_0 = 2.0227e-04
Loss = 3.2619e-03, PNorm = 170.4735, GNorm = 0.3300, lr_0 = 2.0213e-04
Loss = 1.6371e-03, PNorm = 170.4793, GNorm = 0.1178, lr_0 = 2.0199e-04
Loss = 2.4380e-03, PNorm = 170.4830, GNorm = 0.0780, lr_0 = 2.0185e-04
Loss = 1.8019e-03, PNorm = 170.4869, GNorm = 0.1240, lr_0 = 2.0171e-04
Loss = 1.9658e-03, PNorm = 170.4887, GNorm = 0.0785, lr_0 = 2.0157e-04
Loss = 3.2428e-03, PNorm = 170.4903, GNorm = 0.3072, lr_0 = 2.0144e-04
Loss = 3.5489e-03, PNorm = 170.4933, GNorm = 0.0928, lr_0 = 2.0130e-04
Loss = 1.8282e-03, PNorm = 170.4991, GNorm = 0.0925, lr_0 = 2.0116e-04
Loss = 2.1074e-03, PNorm = 170.5046, GNorm = 0.1177, lr_0 = 2.0102e-04
Loss = 1.8075e-03, PNorm = 170.5086, GNorm = 0.1139, lr_0 = 2.0088e-04
Loss = 3.2557e-03, PNorm = 170.5122, GNorm = 0.1039, lr_0 = 2.0075e-04
Loss = 3.0285e-03, PNorm = 170.5163, GNorm = 0.0951, lr_0 = 2.0061e-04
Loss = 1.6793e-03, PNorm = 170.5207, GNorm = 0.1284, lr_0 = 2.0047e-04
Loss = 2.4944e-03, PNorm = 170.5244, GNorm = 0.2342, lr_0 = 2.0033e-04
Loss = 2.2276e-03, PNorm = 170.5305, GNorm = 0.1744, lr_0 = 2.0020e-04
Loss = 4.2447e-03, PNorm = 170.5338, GNorm = 0.1153, lr_0 = 2.0006e-04
Loss = 2.1933e-03, PNorm = 170.5374, GNorm = 0.1335, lr_0 = 1.9992e-04
Loss = 4.2624e-03, PNorm = 170.5400, GNorm = 0.1346, lr_0 = 1.9979e-04
Loss = 2.2968e-03, PNorm = 170.5438, GNorm = 0.1332, lr_0 = 1.9965e-04
Loss = 2.2214e-03, PNorm = 170.5474, GNorm = 0.0681, lr_0 = 1.9951e-04
Loss = 2.0499e-03, PNorm = 170.5525, GNorm = 0.0591, lr_0 = 1.9938e-04
Loss = 2.4682e-03, PNorm = 170.5561, GNorm = 0.3243, lr_0 = 1.9924e-04
Loss = 3.0892e-03, PNorm = 170.5593, GNorm = 0.1223, lr_0 = 1.9910e-04
Loss = 2.8986e-03, PNorm = 170.5636, GNorm = 0.1938, lr_0 = 1.9897e-04
Loss = 1.8795e-03, PNorm = 170.5685, GNorm = 0.2797, lr_0 = 1.9883e-04
Loss = 2.3610e-03, PNorm = 170.5727, GNorm = 0.0760, lr_0 = 1.9869e-04
Loss = 2.3024e-03, PNorm = 170.5788, GNorm = 0.1613, lr_0 = 1.9856e-04
Loss = 3.9926e-03, PNorm = 170.5834, GNorm = 0.3397, lr_0 = 1.9842e-04
Loss = 1.9807e-03, PNorm = 170.5900, GNorm = 0.1778, lr_0 = 1.9829e-04
Loss = 2.4167e-03, PNorm = 170.5950, GNorm = 0.0502, lr_0 = 1.9815e-04
Loss = 2.6037e-03, PNorm = 170.5991, GNorm = 0.0761, lr_0 = 1.9801e-04
Loss = 2.1041e-03, PNorm = 170.6022, GNorm = 0.3886, lr_0 = 1.9788e-04
Loss = 2.7052e-03, PNorm = 170.6048, GNorm = 0.1333, lr_0 = 1.9774e-04
Loss = 2.4141e-03, PNorm = 170.6088, GNorm = 0.2326, lr_0 = 1.9761e-04
Loss = 3.1409e-03, PNorm = 170.6127, GNorm = 0.2750, lr_0 = 1.9747e-04
Loss = 2.1284e-03, PNorm = 170.6191, GNorm = 0.0868, lr_0 = 1.9734e-04
Loss = 2.8227e-03, PNorm = 170.6242, GNorm = 0.0947, lr_0 = 1.9720e-04
Loss = 2.5248e-03, PNorm = 170.6298, GNorm = 0.1503, lr_0 = 1.9707e-04
Loss = 1.7141e-03, PNorm = 170.6352, GNorm = 0.2408, lr_0 = 1.9693e-04
Loss = 3.1554e-03, PNorm = 170.6388, GNorm = 0.1427, lr_0 = 1.9680e-04
Loss = 1.7241e-03, PNorm = 170.6442, GNorm = 0.2437, lr_0 = 1.9666e-04
Loss = 3.5556e-03, PNorm = 170.6484, GNorm = 0.0469, lr_0 = 1.9653e-04
Loss = 2.3560e-03, PNorm = 170.6529, GNorm = 0.1429, lr_0 = 1.9639e-04
Loss = 1.8051e-03, PNorm = 170.6580, GNorm = 0.2719, lr_0 = 1.9626e-04
Loss = 1.7179e-03, PNorm = 170.6613, GNorm = 0.1917, lr_0 = 1.9612e-04
Loss = 3.1986e-03, PNorm = 170.6635, GNorm = 0.1658, lr_0 = 1.9599e-04
Loss = 2.9585e-03, PNorm = 170.6683, GNorm = 0.1024, lr_0 = 1.9585e-04
Loss = 6.3806e-03, PNorm = 170.6724, GNorm = 0.0569, lr_0 = 1.9572e-04
Loss = 4.3203e-03, PNorm = 170.6777, GNorm = 0.0545, lr_0 = 1.9559e-04
Loss = 2.3574e-03, PNorm = 170.6817, GNorm = 0.1794, lr_0 = 1.9545e-04
Loss = 1.8179e-03, PNorm = 170.6861, GNorm = 0.1156, lr_0 = 1.9532e-04
Loss = 2.2072e-03, PNorm = 170.6917, GNorm = 0.1914, lr_0 = 1.9518e-04
Loss = 1.7716e-03, PNorm = 170.6956, GNorm = 0.2509, lr_0 = 1.9505e-04
Loss = 2.6296e-03, PNorm = 170.6974, GNorm = 0.0747, lr_0 = 1.9492e-04
Loss = 2.4900e-03, PNorm = 170.6990, GNorm = 0.1629, lr_0 = 1.9478e-04
Loss = 1.8996e-03, PNorm = 170.7015, GNorm = 0.0918, lr_0 = 1.9465e-04
Loss = 2.6258e-03, PNorm = 170.7055, GNorm = 0.2184, lr_0 = 1.9452e-04
Loss = 2.7291e-03, PNorm = 170.7122, GNorm = 0.1424, lr_0 = 1.9438e-04
Loss = 1.9881e-03, PNorm = 170.7184, GNorm = 0.1055, lr_0 = 1.9425e-04
Loss = 3.5048e-03, PNorm = 170.7244, GNorm = 0.0788, lr_0 = 1.9412e-04
Loss = 3.4480e-03, PNorm = 170.7272, GNorm = 0.4120, lr_0 = 1.9398e-04
Loss = 2.1310e-03, PNorm = 170.7313, GNorm = 0.0855, lr_0 = 1.9385e-04
Loss = 3.4075e-03, PNorm = 170.7364, GNorm = 0.2061, lr_0 = 1.9372e-04
Loss = 2.0849e-03, PNorm = 170.7429, GNorm = 0.1161, lr_0 = 1.9359e-04
Loss = 2.4761e-03, PNorm = 170.7489, GNorm = 0.0690, lr_0 = 1.9345e-04
Loss = 1.7289e-03, PNorm = 170.7559, GNorm = 0.0860, lr_0 = 1.9332e-04
Loss = 2.6462e-03, PNorm = 170.7609, GNorm = 0.0588, lr_0 = 1.9319e-04
Loss = 3.1917e-03, PNorm = 170.7651, GNorm = 0.0648, lr_0 = 1.9306e-04
Validation mae = 0.278515
Epoch 22
Loss = 2.2502e-03, PNorm = 170.7670, GNorm = 0.0401, lr_0 = 1.9292e-04
Loss = 1.7614e-03, PNorm = 170.7699, GNorm = 0.0824, lr_0 = 1.9279e-04
Loss = 1.7228e-03, PNorm = 170.7734, GNorm = 0.1041, lr_0 = 1.9266e-04
Loss = 2.8725e-03, PNorm = 170.7770, GNorm = 0.1309, lr_0 = 1.9253e-04
Loss = 3.4326e-03, PNorm = 170.7793, GNorm = 0.3376, lr_0 = 1.9240e-04
Loss = 3.4286e-03, PNorm = 170.7834, GNorm = 0.1605, lr_0 = 1.9226e-04
Loss = 2.7027e-03, PNorm = 170.7876, GNorm = 0.1596, lr_0 = 1.9213e-04
Loss = 2.0346e-03, PNorm = 170.7907, GNorm = 0.1423, lr_0 = 1.9200e-04
Loss = 5.0156e-03, PNorm = 170.7947, GNorm = 0.3364, lr_0 = 1.9187e-04
Loss = 2.3281e-03, PNorm = 170.7975, GNorm = 0.2567, lr_0 = 1.9174e-04
Loss = 3.1418e-03, PNorm = 170.8013, GNorm = 0.1653, lr_0 = 1.9161e-04
Loss = 2.6872e-03, PNorm = 170.8061, GNorm = 0.1146, lr_0 = 1.9148e-04
Loss = 2.1932e-03, PNorm = 170.8087, GNorm = 0.2014, lr_0 = 1.9134e-04
Loss = 2.1949e-03, PNorm = 170.8109, GNorm = 0.1610, lr_0 = 1.9121e-04
Loss = 4.2147e-03, PNorm = 170.8131, GNorm = 0.0616, lr_0 = 1.9108e-04
Loss = 2.3486e-03, PNorm = 170.8191, GNorm = 0.1923, lr_0 = 1.9095e-04
Loss = 2.1509e-03, PNorm = 170.8248, GNorm = 0.2228, lr_0 = 1.9082e-04
Loss = 2.3492e-03, PNorm = 170.8305, GNorm = 0.2653, lr_0 = 1.9069e-04
Loss = 1.9719e-03, PNorm = 170.8349, GNorm = 0.2057, lr_0 = 1.9056e-04
Loss = 2.2674e-03, PNorm = 170.8403, GNorm = 0.2244, lr_0 = 1.9043e-04
Loss = 2.6740e-03, PNorm = 170.8464, GNorm = 0.0666, lr_0 = 1.9030e-04
Loss = 2.0934e-03, PNorm = 170.8508, GNorm = 0.2492, lr_0 = 1.9017e-04
Loss = 2.2558e-03, PNorm = 170.8540, GNorm = 0.0378, lr_0 = 1.9004e-04
Loss = 1.7109e-03, PNorm = 170.8560, GNorm = 0.0970, lr_0 = 1.8991e-04
Loss = 1.8926e-03, PNorm = 170.8579, GNorm = 0.0823, lr_0 = 1.8978e-04
Loss = 2.1322e-03, PNorm = 170.8605, GNorm = 0.0555, lr_0 = 1.8965e-04
Loss = 1.7740e-03, PNorm = 170.8647, GNorm = 0.0950, lr_0 = 1.8952e-04
Loss = 3.5876e-03, PNorm = 170.8710, GNorm = 0.2483, lr_0 = 1.8939e-04
Loss = 2.2973e-03, PNorm = 170.8763, GNorm = 0.1110, lr_0 = 1.8926e-04
Loss = 2.0764e-03, PNorm = 170.8797, GNorm = 0.1387, lr_0 = 1.8913e-04
Loss = 2.3589e-03, PNorm = 170.8822, GNorm = 0.1472, lr_0 = 1.8900e-04
Loss = 1.5641e-03, PNorm = 170.8843, GNorm = 0.0479, lr_0 = 1.8887e-04
Loss = 4.0548e-03, PNorm = 170.8868, GNorm = 0.0650, lr_0 = 1.8874e-04
Loss = 2.4734e-03, PNorm = 170.8908, GNorm = 0.1510, lr_0 = 1.8861e-04
Loss = 1.8888e-03, PNorm = 170.8948, GNorm = 0.1523, lr_0 = 1.8848e-04
Loss = 1.9124e-03, PNorm = 170.9001, GNorm = 0.1877, lr_0 = 1.8835e-04
Loss = 1.7910e-03, PNorm = 170.9043, GNorm = 0.0371, lr_0 = 1.8822e-04
Loss = 2.1908e-03, PNorm = 170.9069, GNorm = 0.0988, lr_0 = 1.8809e-04
Loss = 1.6647e-03, PNorm = 170.9102, GNorm = 0.1285, lr_0 = 1.8797e-04
Loss = 3.1148e-03, PNorm = 170.9124, GNorm = 0.0837, lr_0 = 1.8784e-04
Loss = 1.7909e-03, PNorm = 170.9152, GNorm = 0.0813, lr_0 = 1.8771e-04
Loss = 2.1141e-03, PNorm = 170.9169, GNorm = 0.0798, lr_0 = 1.8758e-04
Loss = 1.4947e-03, PNorm = 170.9199, GNorm = 0.1314, lr_0 = 1.8745e-04
Loss = 2.2129e-03, PNorm = 170.9225, GNorm = 0.0544, lr_0 = 1.8732e-04
Loss = 1.6589e-03, PNorm = 170.9262, GNorm = 0.1423, lr_0 = 1.8719e-04
Loss = 1.9369e-03, PNorm = 170.9274, GNorm = 0.1336, lr_0 = 1.8707e-04
Loss = 1.9484e-03, PNorm = 170.9312, GNorm = 0.1089, lr_0 = 1.8694e-04
Loss = 1.7968e-03, PNorm = 170.9356, GNorm = 0.0461, lr_0 = 1.8681e-04
Loss = 1.5824e-03, PNorm = 170.9407, GNorm = 0.0925, lr_0 = 1.8668e-04
Loss = 2.2253e-03, PNorm = 170.9455, GNorm = 0.0838, lr_0 = 1.8655e-04
Loss = 2.1037e-03, PNorm = 170.9510, GNorm = 0.1558, lr_0 = 1.8643e-04
Loss = 1.4503e-03, PNorm = 170.9536, GNorm = 0.0842, lr_0 = 1.8630e-04
Loss = 1.6113e-03, PNorm = 170.9562, GNorm = 0.0866, lr_0 = 1.8617e-04
Loss = 1.8178e-03, PNorm = 170.9584, GNorm = 0.1041, lr_0 = 1.8604e-04
Loss = 2.0522e-03, PNorm = 170.9621, GNorm = 0.2099, lr_0 = 1.8592e-04
Loss = 2.5752e-03, PNorm = 170.9647, GNorm = 0.2104, lr_0 = 1.8579e-04
Loss = 5.2663e-03, PNorm = 170.9684, GNorm = 0.1151, lr_0 = 1.8566e-04
Loss = 1.6908e-03, PNorm = 170.9726, GNorm = 0.0876, lr_0 = 1.8553e-04
Loss = 2.5048e-03, PNorm = 170.9764, GNorm = 0.0510, lr_0 = 1.8541e-04
Loss = 1.7144e-03, PNorm = 170.9793, GNorm = 0.0514, lr_0 = 1.8528e-04
Loss = 3.9088e-03, PNorm = 170.9844, GNorm = 0.0878, lr_0 = 1.8515e-04
Loss = 2.2864e-03, PNorm = 170.9884, GNorm = 0.1278, lr_0 = 1.8503e-04
Loss = 3.1913e-03, PNorm = 170.9933, GNorm = 0.3010, lr_0 = 1.8490e-04
Loss = 2.4920e-03, PNorm = 170.9972, GNorm = 0.0996, lr_0 = 1.8477e-04
Loss = 4.8537e-03, PNorm = 171.0019, GNorm = 0.0486, lr_0 = 1.8465e-04
Loss = 1.6161e-03, PNorm = 171.0065, GNorm = 0.1244, lr_0 = 1.8452e-04
Loss = 2.2810e-03, PNorm = 171.0122, GNorm = 0.1000, lr_0 = 1.8439e-04
Loss = 2.1844e-03, PNorm = 171.0191, GNorm = 0.2829, lr_0 = 1.8427e-04
Loss = 2.3005e-03, PNorm = 171.0231, GNorm = 0.1226, lr_0 = 1.8414e-04
Loss = 2.4751e-03, PNorm = 171.0262, GNorm = 0.1119, lr_0 = 1.8401e-04
Loss = 1.5636e-03, PNorm = 171.0292, GNorm = 0.1453, lr_0 = 1.8389e-04
Loss = 1.9665e-03, PNorm = 171.0327, GNorm = 0.0989, lr_0 = 1.8376e-04
Loss = 2.4677e-03, PNorm = 171.0356, GNorm = 0.1801, lr_0 = 1.8364e-04
Loss = 2.1960e-03, PNorm = 171.0400, GNorm = 0.2154, lr_0 = 1.8351e-04
Loss = 1.6360e-03, PNorm = 171.0447, GNorm = 0.0397, lr_0 = 1.8338e-04
Loss = 2.9720e-03, PNorm = 171.0481, GNorm = 0.2444, lr_0 = 1.8326e-04
Loss = 3.8973e-03, PNorm = 171.0510, GNorm = 0.1882, lr_0 = 1.8313e-04
Loss = 1.6213e-03, PNorm = 171.0536, GNorm = 0.1760, lr_0 = 1.8301e-04
Loss = 3.7125e-03, PNorm = 171.0570, GNorm = 0.1476, lr_0 = 1.8288e-04
Loss = 1.9454e-03, PNorm = 171.0592, GNorm = 0.0770, lr_0 = 1.8276e-04
Loss = 2.3417e-03, PNorm = 171.0621, GNorm = 0.0967, lr_0 = 1.8263e-04
Loss = 1.7744e-03, PNorm = 171.0663, GNorm = 0.0399, lr_0 = 1.8251e-04
Loss = 1.6005e-03, PNorm = 171.0704, GNorm = 0.1310, lr_0 = 1.8238e-04
Loss = 1.5980e-03, PNorm = 171.0735, GNorm = 0.1205, lr_0 = 1.8226e-04
Loss = 3.5385e-03, PNorm = 171.0759, GNorm = 0.1668, lr_0 = 1.8213e-04
Loss = 2.8978e-03, PNorm = 171.0792, GNorm = 0.2419, lr_0 = 1.8201e-04
Loss = 4.7267e-03, PNorm = 171.0830, GNorm = 0.1456, lr_0 = 1.8188e-04
Loss = 2.4700e-03, PNorm = 171.0868, GNorm = 0.0970, lr_0 = 1.8176e-04
Loss = 2.1371e-03, PNorm = 171.0912, GNorm = 0.1051, lr_0 = 1.8163e-04
Loss = 2.5345e-03, PNorm = 171.0962, GNorm = 0.1798, lr_0 = 1.8151e-04
Loss = 1.6902e-03, PNorm = 171.1005, GNorm = 0.1924, lr_0 = 1.8138e-04
Loss = 1.4329e-03, PNorm = 171.1045, GNorm = 0.0771, lr_0 = 1.8126e-04
Loss = 2.5328e-03, PNorm = 171.1086, GNorm = 0.0717, lr_0 = 1.8114e-04
Loss = 2.8119e-03, PNorm = 171.1123, GNorm = 0.0960, lr_0 = 1.8101e-04
Loss = 3.3108e-03, PNorm = 171.1166, GNorm = 0.2232, lr_0 = 1.8089e-04
Loss = 1.4499e-03, PNorm = 171.1219, GNorm = 0.1740, lr_0 = 1.8076e-04
Loss = 1.9093e-03, PNorm = 171.1254, GNorm = 0.1428, lr_0 = 1.8064e-04
Loss = 3.0920e-03, PNorm = 171.1290, GNorm = 0.2944, lr_0 = 1.8052e-04
Loss = 2.0416e-03, PNorm = 171.1331, GNorm = 0.2883, lr_0 = 1.8039e-04
Loss = 1.6217e-03, PNorm = 171.1380, GNorm = 0.1668, lr_0 = 1.8027e-04
Loss = 3.0994e-03, PNorm = 171.1410, GNorm = 0.2220, lr_0 = 1.8015e-04
Loss = 2.2592e-03, PNorm = 171.1436, GNorm = 0.0686, lr_0 = 1.8002e-04
Loss = 1.9502e-03, PNorm = 171.1488, GNorm = 0.1732, lr_0 = 1.7990e-04
Loss = 2.1577e-03, PNorm = 171.1514, GNorm = 0.3242, lr_0 = 1.7978e-04
Loss = 1.7397e-03, PNorm = 171.1558, GNorm = 0.1932, lr_0 = 1.7965e-04
Loss = 2.0177e-03, PNorm = 171.1598, GNorm = 0.0470, lr_0 = 1.7953e-04
Loss = 2.5989e-03, PNorm = 171.1638, GNorm = 0.1827, lr_0 = 1.7941e-04
Loss = 4.3680e-03, PNorm = 171.1686, GNorm = 0.2227, lr_0 = 1.7928e-04
Loss = 2.2954e-03, PNorm = 171.1739, GNorm = 0.1852, lr_0 = 1.7916e-04
Loss = 1.8805e-03, PNorm = 171.1759, GNorm = 0.0786, lr_0 = 1.7904e-04
Loss = 1.5402e-03, PNorm = 171.1783, GNorm = 0.1995, lr_0 = 1.7892e-04
Loss = 2.1258e-03, PNorm = 171.1817, GNorm = 0.1982, lr_0 = 1.7879e-04
Loss = 2.7010e-03, PNorm = 171.1832, GNorm = 0.1153, lr_0 = 1.7867e-04
Loss = 2.1060e-03, PNorm = 171.1850, GNorm = 0.1658, lr_0 = 1.7855e-04
Loss = 1.7117e-03, PNorm = 171.1871, GNorm = 0.2460, lr_0 = 1.7843e-04
Loss = 3.1596e-03, PNorm = 171.1905, GNorm = 0.3334, lr_0 = 1.7830e-04
Loss = 3.5608e-03, PNorm = 171.1959, GNorm = 0.1444, lr_0 = 1.7818e-04
Loss = 1.8266e-03, PNorm = 171.2012, GNorm = 0.1239, lr_0 = 1.7806e-04
Loss = 2.0241e-03, PNorm = 171.2060, GNorm = 0.1842, lr_0 = 1.7794e-04
Loss = 1.6807e-03, PNorm = 171.2119, GNorm = 0.1382, lr_0 = 1.7782e-04
Validation mae = 0.278176
Epoch 23
Loss = 1.6596e-03, PNorm = 171.2147, GNorm = 0.0975, lr_0 = 1.7769e-04
Loss = 1.4484e-03, PNorm = 171.2164, GNorm = 0.1361, lr_0 = 1.7757e-04
Loss = 1.7275e-03, PNorm = 171.2183, GNorm = 0.0457, lr_0 = 1.7745e-04
Loss = 4.0473e-03, PNorm = 171.2195, GNorm = 0.0801, lr_0 = 1.7733e-04
Loss = 1.7241e-03, PNorm = 171.2206, GNorm = 0.1398, lr_0 = 1.7721e-04
Loss = 2.4984e-03, PNorm = 171.2246, GNorm = 0.1127, lr_0 = 1.7709e-04
Loss = 1.8277e-03, PNorm = 171.2270, GNorm = 0.1913, lr_0 = 1.7696e-04
Loss = 1.4860e-03, PNorm = 171.2310, GNorm = 0.2435, lr_0 = 1.7684e-04
Loss = 2.0876e-03, PNorm = 171.2344, GNorm = 0.1473, lr_0 = 1.7672e-04
Loss = 2.2381e-03, PNorm = 171.2384, GNorm = 0.1393, lr_0 = 1.7660e-04
Loss = 2.1562e-03, PNorm = 171.2407, GNorm = 0.1892, lr_0 = 1.7648e-04
Loss = 2.9648e-03, PNorm = 171.2423, GNorm = 0.1935, lr_0 = 1.7636e-04
Loss = 1.4128e-03, PNorm = 171.2454, GNorm = 0.1775, lr_0 = 1.7624e-04
Loss = 3.1045e-03, PNorm = 171.2482, GNorm = 0.1326, lr_0 = 1.7612e-04
Loss = 2.3649e-03, PNorm = 171.2517, GNorm = 0.2501, lr_0 = 1.7600e-04
Loss = 1.6432e-03, PNorm = 171.2550, GNorm = 0.1753, lr_0 = 1.7588e-04
Loss = 3.3363e-03, PNorm = 171.2593, GNorm = 0.0624, lr_0 = 1.7576e-04
Loss = 1.3666e-03, PNorm = 171.2638, GNorm = 0.0983, lr_0 = 1.7564e-04
Loss = 1.5943e-03, PNorm = 171.2663, GNorm = 0.1954, lr_0 = 1.7552e-04
Loss = 1.5008e-03, PNorm = 171.2689, GNorm = 0.1528, lr_0 = 1.7540e-04
Loss = 2.8371e-03, PNorm = 171.2714, GNorm = 0.0846, lr_0 = 1.7528e-04
Loss = 1.7266e-03, PNorm = 171.2742, GNorm = 0.1300, lr_0 = 1.7516e-04
Loss = 1.9907e-03, PNorm = 171.2775, GNorm = 0.2014, lr_0 = 1.7504e-04
Loss = 2.1773e-03, PNorm = 171.2807, GNorm = 0.0811, lr_0 = 1.7492e-04
Loss = 2.4652e-03, PNorm = 171.2835, GNorm = 0.2130, lr_0 = 1.7480e-04
Loss = 1.4005e-03, PNorm = 171.2873, GNorm = 0.1134, lr_0 = 1.7468e-04
Loss = 1.1813e-03, PNorm = 171.2899, GNorm = 0.1539, lr_0 = 1.7456e-04
Loss = 3.2134e-03, PNorm = 171.2930, GNorm = 0.0988, lr_0 = 1.7444e-04
Loss = 1.8251e-03, PNorm = 171.2972, GNorm = 0.1300, lr_0 = 1.7432e-04
Loss = 1.6896e-03, PNorm = 171.2998, GNorm = 0.2026, lr_0 = 1.7420e-04
Loss = 1.6427e-03, PNorm = 171.3018, GNorm = 0.0607, lr_0 = 1.7408e-04
Loss = 2.5722e-03, PNorm = 171.3036, GNorm = 0.1027, lr_0 = 1.7396e-04
Loss = 1.6191e-03, PNorm = 171.3066, GNorm = 0.1622, lr_0 = 1.7384e-04
Loss = 3.4212e-03, PNorm = 171.3105, GNorm = 0.0808, lr_0 = 1.7372e-04
Loss = 2.0958e-03, PNorm = 171.3140, GNorm = 0.2795, lr_0 = 1.7360e-04
Loss = 1.4319e-03, PNorm = 171.3163, GNorm = 0.0755, lr_0 = 1.7348e-04
Loss = 1.8331e-03, PNorm = 171.3194, GNorm = 0.4462, lr_0 = 1.7336e-04
Loss = 1.2575e-03, PNorm = 171.3233, GNorm = 0.0627, lr_0 = 1.7325e-04
Loss = 3.9071e-03, PNorm = 171.3259, GNorm = 0.2081, lr_0 = 1.7313e-04
Loss = 1.9587e-03, PNorm = 171.3283, GNorm = 0.1283, lr_0 = 1.7301e-04
Loss = 1.7245e-03, PNorm = 171.3322, GNorm = 0.4046, lr_0 = 1.7289e-04
Loss = 1.5859e-03, PNorm = 171.3363, GNorm = 0.2130, lr_0 = 1.7277e-04
Loss = 1.9900e-03, PNorm = 171.3402, GNorm = 0.1417, lr_0 = 1.7265e-04
Loss = 1.7370e-03, PNorm = 171.3434, GNorm = 0.1480, lr_0 = 1.7253e-04
Loss = 1.3852e-03, PNorm = 171.3462, GNorm = 0.0513, lr_0 = 1.7242e-04
Loss = 2.6373e-03, PNorm = 171.3498, GNorm = 0.0965, lr_0 = 1.7230e-04
Loss = 1.5580e-03, PNorm = 171.3529, GNorm = 0.1188, lr_0 = 1.7218e-04
Loss = 1.6475e-03, PNorm = 171.3557, GNorm = 0.0541, lr_0 = 1.7206e-04
Loss = 1.9503e-03, PNorm = 171.3574, GNorm = 0.0689, lr_0 = 1.7194e-04
Loss = 1.3967e-03, PNorm = 171.3611, GNorm = 0.1268, lr_0 = 1.7183e-04
Loss = 2.1435e-03, PNorm = 171.3644, GNorm = 0.1209, lr_0 = 1.7171e-04
Loss = 2.4542e-03, PNorm = 171.3671, GNorm = 0.0880, lr_0 = 1.7159e-04
Loss = 2.6036e-03, PNorm = 171.3711, GNorm = 0.1329, lr_0 = 1.7147e-04
Loss = 4.6720e-03, PNorm = 171.3739, GNorm = 0.2389, lr_0 = 1.7136e-04
Loss = 2.4494e-03, PNorm = 171.3768, GNorm = 0.0655, lr_0 = 1.7124e-04
Loss = 2.1391e-03, PNorm = 171.3812, GNorm = 0.1241, lr_0 = 1.7112e-04
Loss = 2.9772e-03, PNorm = 171.3856, GNorm = 0.0682, lr_0 = 1.7100e-04
Loss = 2.9802e-03, PNorm = 171.3893, GNorm = 0.1655, lr_0 = 1.7089e-04
Loss = 1.5698e-03, PNorm = 171.3917, GNorm = 0.1641, lr_0 = 1.7077e-04
Loss = 1.5909e-03, PNorm = 171.3959, GNorm = 0.0612, lr_0 = 1.7065e-04
Loss = 2.1316e-03, PNorm = 171.3999, GNorm = 0.1410, lr_0 = 1.7054e-04
Loss = 1.6230e-03, PNorm = 171.4024, GNorm = 0.0818, lr_0 = 1.7042e-04
Loss = 2.5274e-03, PNorm = 171.4045, GNorm = 0.1115, lr_0 = 1.7030e-04
Loss = 1.8638e-03, PNorm = 171.4078, GNorm = 0.0806, lr_0 = 1.7019e-04
Loss = 3.2733e-03, PNorm = 171.4111, GNorm = 0.1451, lr_0 = 1.7007e-04
Loss = 3.3228e-03, PNorm = 171.4136, GNorm = 0.3022, lr_0 = 1.6995e-04
Loss = 1.8249e-03, PNorm = 171.4172, GNorm = 0.1385, lr_0 = 1.6984e-04
Loss = 1.9063e-03, PNorm = 171.4217, GNorm = 0.1878, lr_0 = 1.6972e-04
Loss = 2.1332e-03, PNorm = 171.4258, GNorm = 0.2969, lr_0 = 1.6960e-04
Loss = 2.0803e-03, PNorm = 171.4289, GNorm = 0.0706, lr_0 = 1.6949e-04
Loss = 1.7902e-03, PNorm = 171.4300, GNorm = 0.3144, lr_0 = 1.6937e-04
Loss = 1.3803e-03, PNorm = 171.4330, GNorm = 0.0957, lr_0 = 1.6926e-04
Loss = 2.7239e-03, PNorm = 171.4347, GNorm = 0.5231, lr_0 = 1.6914e-04
Loss = 2.1284e-03, PNorm = 171.4371, GNorm = 0.3338, lr_0 = 1.6902e-04
Loss = 3.2926e-03, PNorm = 171.4406, GNorm = 0.0495, lr_0 = 1.6891e-04
Loss = 1.6167e-03, PNorm = 171.4456, GNorm = 0.1950, lr_0 = 1.6879e-04
Loss = 1.6439e-03, PNorm = 171.4497, GNorm = 0.1776, lr_0 = 1.6868e-04
Loss = 1.2654e-03, PNorm = 171.4534, GNorm = 0.2357, lr_0 = 1.6856e-04
Loss = 1.5400e-03, PNorm = 171.4571, GNorm = 0.0698, lr_0 = 1.6845e-04
Loss = 5.7146e-03, PNorm = 171.4606, GNorm = 0.0778, lr_0 = 1.6833e-04
Loss = 1.8041e-03, PNorm = 171.4656, GNorm = 0.3927, lr_0 = 1.6821e-04
Loss = 1.7460e-03, PNorm = 171.4701, GNorm = 0.0995, lr_0 = 1.6810e-04
Loss = 1.9011e-03, PNorm = 171.4724, GNorm = 0.0800, lr_0 = 1.6798e-04
Loss = 2.8658e-03, PNorm = 171.4742, GNorm = 0.1651, lr_0 = 1.6787e-04
Loss = 1.7976e-03, PNorm = 171.4765, GNorm = 0.0390, lr_0 = 1.6775e-04
Loss = 1.6016e-03, PNorm = 171.4794, GNorm = 0.0612, lr_0 = 1.6764e-04
Loss = 1.3437e-03, PNorm = 171.4822, GNorm = 0.1844, lr_0 = 1.6752e-04
Loss = 2.5185e-03, PNorm = 171.4850, GNorm = 0.4020, lr_0 = 1.6741e-04
Loss = 1.5728e-03, PNorm = 171.4890, GNorm = 0.1548, lr_0 = 1.6729e-04
Loss = 2.6990e-03, PNorm = 171.4922, GNorm = 0.1842, lr_0 = 1.6718e-04
Loss = 1.4676e-03, PNorm = 171.4957, GNorm = 0.0452, lr_0 = 1.6707e-04
Loss = 2.4041e-03, PNorm = 171.4978, GNorm = 0.0947, lr_0 = 1.6695e-04
Loss = 2.2665e-03, PNorm = 171.5009, GNorm = 0.0818, lr_0 = 1.6684e-04
Loss = 2.3400e-03, PNorm = 171.5024, GNorm = 0.1261, lr_0 = 1.6672e-04
Loss = 2.3147e-03, PNorm = 171.5043, GNorm = 0.2065, lr_0 = 1.6661e-04
Loss = 2.7132e-03, PNorm = 171.5084, GNorm = 0.0786, lr_0 = 1.6649e-04
Loss = 1.5633e-03, PNorm = 171.5115, GNorm = 0.0766, lr_0 = 1.6638e-04
Loss = 2.0011e-03, PNorm = 171.5146, GNorm = 0.2793, lr_0 = 1.6627e-04
Loss = 1.6272e-03, PNorm = 171.5172, GNorm = 0.2123, lr_0 = 1.6615e-04
Loss = 3.9753e-03, PNorm = 171.5197, GNorm = 0.1124, lr_0 = 1.6604e-04
Loss = 1.8321e-03, PNorm = 171.5213, GNorm = 0.1780, lr_0 = 1.6592e-04
Loss = 3.4946e-03, PNorm = 171.5257, GNorm = 0.1680, lr_0 = 1.6581e-04
Loss = 1.9444e-03, PNorm = 171.5291, GNorm = 0.1897, lr_0 = 1.6570e-04
Loss = 3.2314e-03, PNorm = 171.5324, GNorm = 0.1716, lr_0 = 1.6558e-04
Loss = 2.5664e-03, PNorm = 171.5336, GNorm = 0.1057, lr_0 = 1.6547e-04
Loss = 2.0237e-03, PNorm = 171.5368, GNorm = 0.1770, lr_0 = 1.6536e-04
Loss = 1.5800e-03, PNorm = 171.5389, GNorm = 0.1429, lr_0 = 1.6524e-04
Loss = 1.9321e-03, PNorm = 171.5404, GNorm = 0.1037, lr_0 = 1.6513e-04
Loss = 1.8234e-03, PNorm = 171.5415, GNorm = 0.0731, lr_0 = 1.6502e-04
Loss = 1.2597e-03, PNorm = 171.5446, GNorm = 0.0530, lr_0 = 1.6490e-04
Loss = 1.8396e-03, PNorm = 171.5494, GNorm = 0.0827, lr_0 = 1.6479e-04
Loss = 1.9072e-03, PNorm = 171.5537, GNorm = 0.0951, lr_0 = 1.6468e-04
Loss = 3.6333e-03, PNorm = 171.5579, GNorm = 0.0638, lr_0 = 1.6457e-04
Loss = 1.4426e-03, PNorm = 171.5618, GNorm = 0.0615, lr_0 = 1.6445e-04
Loss = 2.6338e-03, PNorm = 171.5642, GNorm = 0.2240, lr_0 = 1.6434e-04
Loss = 2.0409e-03, PNorm = 171.5667, GNorm = 0.1797, lr_0 = 1.6423e-04
Loss = 1.3854e-03, PNorm = 171.5694, GNorm = 0.1592, lr_0 = 1.6412e-04
Loss = 1.5624e-03, PNorm = 171.5731, GNorm = 0.0391, lr_0 = 1.6400e-04
Loss = 2.2002e-03, PNorm = 171.5764, GNorm = 0.0900, lr_0 = 1.6389e-04
Loss = 2.7775e-03, PNorm = 171.5789, GNorm = 0.0472, lr_0 = 1.6378e-04
Validation mae = 0.278288
Epoch 24
Loss = 2.0518e-03, PNorm = 171.5815, GNorm = 0.0907, lr_0 = 1.6367e-04
Loss = 1.2496e-03, PNorm = 171.5844, GNorm = 0.0492, lr_0 = 1.6355e-04
Loss = 1.3961e-03, PNorm = 171.5860, GNorm = 0.1461, lr_0 = 1.6344e-04
Loss = 1.1911e-03, PNorm = 171.5893, GNorm = 0.0298, lr_0 = 1.6333e-04
Loss = 1.5905e-03, PNorm = 171.5927, GNorm = 0.1081, lr_0 = 1.6322e-04
Loss = 1.3357e-03, PNorm = 171.5951, GNorm = 0.1608, lr_0 = 1.6311e-04
Loss = 1.2765e-03, PNorm = 171.5964, GNorm = 0.1686, lr_0 = 1.6299e-04
Loss = 1.9153e-03, PNorm = 171.5975, GNorm = 0.0799, lr_0 = 1.6288e-04
Loss = 1.1228e-03, PNorm = 171.6005, GNorm = 0.0971, lr_0 = 1.6277e-04
Loss = 1.1261e-03, PNorm = 171.6018, GNorm = 0.0751, lr_0 = 1.6266e-04
Loss = 1.8431e-03, PNorm = 171.6049, GNorm = 0.1972, lr_0 = 1.6255e-04
Loss = 2.0042e-03, PNorm = 171.6081, GNorm = 0.0608, lr_0 = 1.6244e-04
Loss = 1.5388e-03, PNorm = 171.6100, GNorm = 0.0451, lr_0 = 1.6233e-04
Loss = 1.2181e-03, PNorm = 171.6100, GNorm = 0.0826, lr_0 = 1.6221e-04
Loss = 1.7530e-03, PNorm = 171.6114, GNorm = 0.1709, lr_0 = 1.6210e-04
Loss = 1.2647e-03, PNorm = 171.6120, GNorm = 0.1378, lr_0 = 1.6199e-04
Loss = 1.4649e-03, PNorm = 171.6146, GNorm = 0.0829, lr_0 = 1.6188e-04
Loss = 2.4789e-03, PNorm = 171.6176, GNorm = 0.1100, lr_0 = 1.6177e-04
Loss = 1.5862e-03, PNorm = 171.6214, GNorm = 0.1098, lr_0 = 1.6166e-04
Loss = 1.7228e-03, PNorm = 171.6241, GNorm = 0.0848, lr_0 = 1.6155e-04
Loss = 1.5218e-03, PNorm = 171.6272, GNorm = 0.0554, lr_0 = 1.6144e-04
Loss = 1.1806e-03, PNorm = 171.6299, GNorm = 0.1436, lr_0 = 1.6133e-04
Loss = 1.7692e-03, PNorm = 171.6335, GNorm = 0.1042, lr_0 = 1.6122e-04
Loss = 1.6279e-03, PNorm = 171.6350, GNorm = 0.0572, lr_0 = 1.6111e-04
Loss = 1.2261e-03, PNorm = 171.6363, GNorm = 0.0696, lr_0 = 1.6100e-04
Loss = 1.1422e-03, PNorm = 171.6385, GNorm = 0.1156, lr_0 = 1.6089e-04
Loss = 1.3124e-03, PNorm = 171.6403, GNorm = 0.0739, lr_0 = 1.6078e-04
Loss = 1.3121e-03, PNorm = 171.6415, GNorm = 0.1684, lr_0 = 1.6067e-04
Loss = 1.2622e-03, PNorm = 171.6428, GNorm = 0.0536, lr_0 = 1.6056e-04
Loss = 1.5945e-03, PNorm = 171.6438, GNorm = 0.1773, lr_0 = 1.6045e-04
Loss = 1.7783e-03, PNorm = 171.6481, GNorm = 0.0748, lr_0 = 1.6034e-04
Loss = 1.5746e-03, PNorm = 171.6509, GNorm = 0.1752, lr_0 = 1.6023e-04
Loss = 2.2300e-03, PNorm = 171.6545, GNorm = 0.0685, lr_0 = 1.6012e-04
Loss = 1.6750e-03, PNorm = 171.6573, GNorm = 0.1419, lr_0 = 1.6001e-04
Loss = 1.1586e-03, PNorm = 171.6599, GNorm = 0.0703, lr_0 = 1.5990e-04
Loss = 1.2396e-03, PNorm = 171.6623, GNorm = 0.0534, lr_0 = 1.5979e-04
Loss = 2.0060e-03, PNorm = 171.6650, GNorm = 0.1471, lr_0 = 1.5968e-04
Loss = 2.2807e-03, PNorm = 171.6681, GNorm = 0.0435, lr_0 = 1.5957e-04
Loss = 1.3921e-03, PNorm = 171.6707, GNorm = 0.1694, lr_0 = 1.5946e-04
Loss = 1.6830e-03, PNorm = 171.6736, GNorm = 0.0862, lr_0 = 1.5935e-04
Loss = 2.2218e-03, PNorm = 171.6763, GNorm = 0.2118, lr_0 = 1.5924e-04
Loss = 1.4013e-03, PNorm = 171.6785, GNorm = 0.2800, lr_0 = 1.5913e-04
Loss = 1.4008e-03, PNorm = 171.6810, GNorm = 0.1824, lr_0 = 1.5902e-04
Loss = 1.1660e-03, PNorm = 171.6843, GNorm = 0.0822, lr_0 = 1.5891e-04
Loss = 1.5479e-03, PNorm = 171.6873, GNorm = 0.0848, lr_0 = 1.5880e-04
Loss = 3.1040e-03, PNorm = 171.6913, GNorm = 0.0275, lr_0 = 1.5870e-04
Loss = 1.1580e-03, PNorm = 171.6941, GNorm = 0.1459, lr_0 = 1.5859e-04
Loss = 1.8953e-03, PNorm = 171.6971, GNorm = 0.1120, lr_0 = 1.5848e-04
Loss = 3.8494e-03, PNorm = 171.7006, GNorm = 0.1847, lr_0 = 1.5837e-04
Loss = 1.6022e-03, PNorm = 171.7024, GNorm = 0.1186, lr_0 = 1.5826e-04
Loss = 1.3581e-03, PNorm = 171.7053, GNorm = 0.1087, lr_0 = 1.5815e-04
Loss = 1.1871e-03, PNorm = 171.7094, GNorm = 0.2050, lr_0 = 1.5804e-04
Loss = 2.1361e-03, PNorm = 171.7122, GNorm = 0.1832, lr_0 = 1.5794e-04
Loss = 2.4043e-03, PNorm = 171.7137, GNorm = 0.0421, lr_0 = 1.5783e-04
Loss = 3.0488e-03, PNorm = 171.7149, GNorm = 0.1829, lr_0 = 1.5772e-04
Loss = 2.8569e-03, PNorm = 171.7177, GNorm = 0.0955, lr_0 = 1.5761e-04
Loss = 1.6481e-03, PNorm = 171.7190, GNorm = 0.1145, lr_0 = 1.5750e-04
Loss = 1.3599e-03, PNorm = 171.7219, GNorm = 0.0963, lr_0 = 1.5740e-04
Loss = 1.2111e-03, PNorm = 171.7235, GNorm = 0.0849, lr_0 = 1.5729e-04
Loss = 1.1644e-03, PNorm = 171.7269, GNorm = 0.1812, lr_0 = 1.5718e-04
Loss = 1.2577e-03, PNorm = 171.7310, GNorm = 0.1884, lr_0 = 1.5707e-04
Loss = 2.4330e-03, PNorm = 171.7340, GNorm = 0.1937, lr_0 = 1.5697e-04
Loss = 1.6615e-03, PNorm = 171.7381, GNorm = 0.2359, lr_0 = 1.5686e-04
Loss = 2.0398e-03, PNorm = 171.7408, GNorm = 0.2049, lr_0 = 1.5675e-04
Loss = 1.3456e-03, PNorm = 171.7424, GNorm = 0.1546, lr_0 = 1.5664e-04
Loss = 1.0518e-03, PNorm = 171.7457, GNorm = 0.0425, lr_0 = 1.5654e-04
Loss = 1.0400e-03, PNorm = 171.7498, GNorm = 0.0856, lr_0 = 1.5643e-04
Loss = 1.2242e-03, PNorm = 171.7532, GNorm = 0.0594, lr_0 = 1.5632e-04
Loss = 1.8369e-03, PNorm = 171.7550, GNorm = 0.0520, lr_0 = 1.5621e-04
Loss = 1.1707e-03, PNorm = 171.7554, GNorm = 0.2155, lr_0 = 1.5611e-04
Loss = 3.3639e-03, PNorm = 171.7574, GNorm = 0.1559, lr_0 = 1.5600e-04
Loss = 1.1323e-03, PNorm = 171.7584, GNorm = 0.1213, lr_0 = 1.5589e-04
Loss = 4.6548e-03, PNorm = 171.7599, GNorm = 0.2734, lr_0 = 1.5579e-04
Loss = 1.3385e-03, PNorm = 171.7629, GNorm = 0.1875, lr_0 = 1.5568e-04
Loss = 1.5896e-03, PNorm = 171.7656, GNorm = 0.0326, lr_0 = 1.5557e-04
Loss = 1.1169e-03, PNorm = 171.7689, GNorm = 0.0852, lr_0 = 1.5547e-04
Loss = 1.4411e-03, PNorm = 171.7715, GNorm = 0.0692, lr_0 = 1.5536e-04
Loss = 3.8019e-03, PNorm = 171.7730, GNorm = 0.0725, lr_0 = 1.5525e-04
Loss = 2.2686e-03, PNorm = 171.7727, GNorm = 0.1554, lr_0 = 1.5515e-04
Loss = 2.2242e-03, PNorm = 171.7755, GNorm = 0.3686, lr_0 = 1.5504e-04
Loss = 1.0213e-03, PNorm = 171.7813, GNorm = 0.0858, lr_0 = 1.5493e-04
Loss = 2.8436e-03, PNorm = 171.7844, GNorm = 0.0850, lr_0 = 1.5483e-04
Loss = 1.7895e-03, PNorm = 171.7850, GNorm = 0.0869, lr_0 = 1.5472e-04
Loss = 1.2930e-03, PNorm = 171.7876, GNorm = 0.1678, lr_0 = 1.5462e-04
Loss = 2.7740e-03, PNorm = 171.7892, GNorm = 0.0873, lr_0 = 1.5451e-04
Loss = 3.7100e-03, PNorm = 171.7911, GNorm = 0.2157, lr_0 = 1.5440e-04
Loss = 3.0811e-03, PNorm = 171.7947, GNorm = 0.1313, lr_0 = 1.5430e-04
Loss = 1.2444e-03, PNorm = 171.7975, GNorm = 0.0761, lr_0 = 1.5419e-04
Loss = 1.5713e-03, PNorm = 171.8027, GNorm = 0.0928, lr_0 = 1.5409e-04
Loss = 1.9605e-03, PNorm = 171.8075, GNorm = 0.2341, lr_0 = 1.5398e-04
Loss = 2.4380e-03, PNorm = 171.8095, GNorm = 0.0784, lr_0 = 1.5388e-04
Loss = 1.7086e-03, PNorm = 171.8125, GNorm = 0.0882, lr_0 = 1.5377e-04
Loss = 2.1296e-03, PNorm = 171.8156, GNorm = 0.1759, lr_0 = 1.5367e-04
Loss = 2.4056e-03, PNorm = 171.8186, GNorm = 0.0852, lr_0 = 1.5356e-04
Loss = 2.1057e-03, PNorm = 171.8207, GNorm = 0.1547, lr_0 = 1.5346e-04
Loss = 2.6465e-03, PNorm = 171.8235, GNorm = 0.1502, lr_0 = 1.5335e-04
Loss = 2.7166e-03, PNorm = 171.8287, GNorm = 0.1395, lr_0 = 1.5325e-04
Loss = 2.1405e-03, PNorm = 171.8309, GNorm = 0.4997, lr_0 = 1.5314e-04
Loss = 1.3042e-03, PNorm = 171.8343, GNorm = 0.1559, lr_0 = 1.5304e-04
Loss = 1.5378e-03, PNorm = 171.8380, GNorm = 0.0548, lr_0 = 1.5293e-04
Loss = 3.0598e-03, PNorm = 171.8441, GNorm = 0.0907, lr_0 = 1.5283e-04
Loss = 1.0395e-03, PNorm = 171.8497, GNorm = 0.0481, lr_0 = 1.5272e-04
Loss = 2.6775e-03, PNorm = 171.8549, GNorm = 0.1988, lr_0 = 1.5262e-04
Loss = 1.8278e-03, PNorm = 171.8556, GNorm = 0.0442, lr_0 = 1.5251e-04
Loss = 6.5806e-03, PNorm = 171.8566, GNorm = 0.1037, lr_0 = 1.5241e-04
Loss = 2.5216e-03, PNorm = 171.8577, GNorm = 0.0721, lr_0 = 1.5230e-04
Loss = 1.3032e-03, PNorm = 171.8609, GNorm = 0.0805, lr_0 = 1.5220e-04
Loss = 1.8904e-03, PNorm = 171.8629, GNorm = 0.1864, lr_0 = 1.5209e-04
Loss = 2.5061e-03, PNorm = 171.8636, GNorm = 0.0260, lr_0 = 1.5199e-04
Loss = 1.4459e-03, PNorm = 171.8663, GNorm = 0.1112, lr_0 = 1.5189e-04
Loss = 1.6320e-03, PNorm = 171.8666, GNorm = 0.0383, lr_0 = 1.5178e-04
Loss = 1.9543e-03, PNorm = 171.8692, GNorm = 0.1349, lr_0 = 1.5168e-04
Loss = 1.4554e-03, PNorm = 171.8715, GNorm = 0.0696, lr_0 = 1.5157e-04
Loss = 1.9931e-03, PNorm = 171.8735, GNorm = 0.0467, lr_0 = 1.5147e-04
Loss = 1.8616e-03, PNorm = 171.8751, GNorm = 0.1883, lr_0 = 1.5137e-04
Loss = 2.4536e-03, PNorm = 171.8760, GNorm = 0.1128, lr_0 = 1.5126e-04
Loss = 3.4730e-03, PNorm = 171.8791, GNorm = 0.1269, lr_0 = 1.5116e-04
Loss = 1.6028e-03, PNorm = 171.8840, GNorm = 0.1348, lr_0 = 1.5106e-04
Loss = 3.3455e-03, PNorm = 171.8881, GNorm = 0.0707, lr_0 = 1.5095e-04
Loss = 1.9624e-03, PNorm = 171.8915, GNorm = 0.1955, lr_0 = 1.5085e-04
Validation mae = 0.277946
Epoch 25
Loss = 1.3305e-03, PNorm = 171.8933, GNorm = 0.0754, lr_0 = 1.5075e-04
Loss = 1.1243e-03, PNorm = 171.8956, GNorm = 0.1348, lr_0 = 1.5064e-04
Loss = 1.1665e-03, PNorm = 171.8970, GNorm = 0.0737, lr_0 = 1.5054e-04
Loss = 1.5507e-03, PNorm = 171.8984, GNorm = 0.1842, lr_0 = 1.5044e-04
Loss = 1.0759e-03, PNorm = 171.9002, GNorm = 0.0858, lr_0 = 1.5033e-04
Loss = 1.2046e-03, PNorm = 171.9027, GNorm = 0.0516, lr_0 = 1.5023e-04
Loss = 1.5468e-03, PNorm = 171.9057, GNorm = 0.0308, lr_0 = 1.5013e-04
Loss = 2.5101e-03, PNorm = 171.9081, GNorm = 0.1653, lr_0 = 1.5002e-04
Loss = 1.1339e-03, PNorm = 171.9103, GNorm = 0.0382, lr_0 = 1.4992e-04
Loss = 1.1144e-03, PNorm = 171.9132, GNorm = 0.0688, lr_0 = 1.4982e-04
Loss = 2.5651e-03, PNorm = 171.9168, GNorm = 0.1015, lr_0 = 1.4972e-04
Loss = 1.3171e-03, PNorm = 171.9190, GNorm = 0.1405, lr_0 = 1.4961e-04
Loss = 1.6243e-03, PNorm = 171.9210, GNorm = 0.0756, lr_0 = 1.4951e-04
Loss = 1.5076e-03, PNorm = 171.9230, GNorm = 0.0819, lr_0 = 1.4941e-04
Loss = 1.6064e-03, PNorm = 171.9244, GNorm = 0.1030, lr_0 = 1.4931e-04
Loss = 9.5326e-04, PNorm = 171.9257, GNorm = 0.0871, lr_0 = 1.4920e-04
Loss = 1.5171e-03, PNorm = 171.9260, GNorm = 0.1074, lr_0 = 1.4910e-04
Loss = 2.3296e-03, PNorm = 171.9288, GNorm = 0.1186, lr_0 = 1.4900e-04
Loss = 2.6431e-03, PNorm = 171.9320, GNorm = 0.0862, lr_0 = 1.4890e-04
Loss = 1.4775e-03, PNorm = 171.9341, GNorm = 0.0374, lr_0 = 1.4880e-04
Loss = 1.0618e-03, PNorm = 171.9361, GNorm = 0.0572, lr_0 = 1.4869e-04
Loss = 1.5921e-03, PNorm = 171.9376, GNorm = 0.1422, lr_0 = 1.4859e-04
Loss = 1.8648e-03, PNorm = 171.9398, GNorm = 0.1385, lr_0 = 1.4849e-04
Loss = 2.4046e-03, PNorm = 171.9421, GNorm = 0.1212, lr_0 = 1.4839e-04
Loss = 1.8297e-03, PNorm = 171.9441, GNorm = 0.0351, lr_0 = 1.4829e-04
Loss = 1.0503e-03, PNorm = 171.9462, GNorm = 0.1254, lr_0 = 1.4818e-04
Loss = 1.1501e-03, PNorm = 171.9472, GNorm = 0.0790, lr_0 = 1.4808e-04
Loss = 1.7880e-03, PNorm = 171.9501, GNorm = 0.0645, lr_0 = 1.4798e-04
Loss = 1.0473e-03, PNorm = 171.9528, GNorm = 0.0852, lr_0 = 1.4788e-04
Loss = 3.9964e-03, PNorm = 171.9555, GNorm = 0.3058, lr_0 = 1.4778e-04
Loss = 1.7063e-03, PNorm = 171.9573, GNorm = 0.0871, lr_0 = 1.4768e-04
Loss = 1.7836e-03, PNorm = 171.9594, GNorm = 0.1203, lr_0 = 1.4758e-04
Loss = 1.0391e-03, PNorm = 171.9615, GNorm = 0.0920, lr_0 = 1.4748e-04
Loss = 1.6154e-03, PNorm = 171.9627, GNorm = 0.0469, lr_0 = 1.4737e-04
Loss = 2.0519e-03, PNorm = 171.9650, GNorm = 0.0557, lr_0 = 1.4727e-04
Loss = 2.1005e-03, PNorm = 171.9684, GNorm = 0.1009, lr_0 = 1.4717e-04
Loss = 1.0552e-03, PNorm = 171.9705, GNorm = 0.1624, lr_0 = 1.4707e-04
Loss = 1.2392e-03, PNorm = 171.9722, GNorm = 0.0983, lr_0 = 1.4697e-04
Loss = 8.1806e-04, PNorm = 171.9736, GNorm = 0.1300, lr_0 = 1.4687e-04
Loss = 1.9304e-03, PNorm = 171.9745, GNorm = 0.1694, lr_0 = 1.4677e-04
Loss = 2.2269e-03, PNorm = 171.9753, GNorm = 0.1277, lr_0 = 1.4667e-04
Loss = 1.8701e-03, PNorm = 171.9783, GNorm = 0.1292, lr_0 = 1.4657e-04
Loss = 2.1085e-03, PNorm = 171.9805, GNorm = 0.1398, lr_0 = 1.4647e-04
Loss = 1.6645e-03, PNorm = 171.9835, GNorm = 0.1234, lr_0 = 1.4637e-04
Loss = 1.7126e-03, PNorm = 171.9856, GNorm = 0.1819, lr_0 = 1.4627e-04
Loss = 1.4138e-03, PNorm = 171.9880, GNorm = 0.0966, lr_0 = 1.4617e-04
Loss = 1.8669e-03, PNorm = 171.9892, GNorm = 0.2150, lr_0 = 1.4607e-04
Loss = 8.4538e-04, PNorm = 171.9925, GNorm = 0.0843, lr_0 = 1.4597e-04
Loss = 8.8192e-04, PNorm = 171.9937, GNorm = 0.0337, lr_0 = 1.4587e-04
Loss = 1.8971e-03, PNorm = 171.9950, GNorm = 0.2078, lr_0 = 1.4577e-04
Loss = 9.5354e-04, PNorm = 171.9965, GNorm = 0.1019, lr_0 = 1.4567e-04
Loss = 1.0564e-03, PNorm = 171.9983, GNorm = 0.0498, lr_0 = 1.4557e-04
Loss = 1.1615e-03, PNorm = 171.9997, GNorm = 0.1224, lr_0 = 1.4547e-04
Loss = 1.3666e-03, PNorm = 172.0016, GNorm = 0.0546, lr_0 = 1.4537e-04
Loss = 1.3302e-03, PNorm = 172.0042, GNorm = 0.0752, lr_0 = 1.4527e-04
Loss = 1.9719e-03, PNorm = 172.0071, GNorm = 0.0528, lr_0 = 1.4517e-04
Loss = 1.8866e-03, PNorm = 172.0109, GNorm = 0.0374, lr_0 = 1.4507e-04
Loss = 9.6371e-04, PNorm = 172.0120, GNorm = 0.0761, lr_0 = 1.4497e-04
Loss = 2.3897e-03, PNorm = 172.0132, GNorm = 0.0706, lr_0 = 1.4487e-04
Loss = 1.4719e-03, PNorm = 172.0141, GNorm = 0.0384, lr_0 = 1.4477e-04
Loss = 1.0482e-03, PNorm = 172.0173, GNorm = 0.1080, lr_0 = 1.4467e-04
Loss = 1.0898e-03, PNorm = 172.0197, GNorm = 0.0890, lr_0 = 1.4457e-04
Loss = 1.5486e-03, PNorm = 172.0211, GNorm = 0.0496, lr_0 = 1.4447e-04
Loss = 1.8336e-03, PNorm = 172.0225, GNorm = 0.0586, lr_0 = 1.4438e-04
Loss = 9.4742e-04, PNorm = 172.0237, GNorm = 0.1653, lr_0 = 1.4428e-04
Loss = 1.2895e-03, PNorm = 172.0254, GNorm = 0.0501, lr_0 = 1.4418e-04
Loss = 1.0882e-03, PNorm = 172.0288, GNorm = 0.0320, lr_0 = 1.4408e-04
Loss = 1.7395e-03, PNorm = 172.0308, GNorm = 0.1262, lr_0 = 1.4398e-04
Loss = 1.8873e-03, PNorm = 172.0316, GNorm = 0.1654, lr_0 = 1.4388e-04
Loss = 1.2363e-03, PNorm = 172.0340, GNorm = 0.0945, lr_0 = 1.4378e-04
Loss = 3.3460e-03, PNorm = 172.0364, GNorm = 0.1343, lr_0 = 1.4368e-04
Loss = 1.2179e-03, PNorm = 172.0388, GNorm = 0.0904, lr_0 = 1.4359e-04
Loss = 1.1064e-03, PNorm = 172.0410, GNorm = 0.0665, lr_0 = 1.4349e-04
Loss = 8.7035e-04, PNorm = 172.0428, GNorm = 0.1320, lr_0 = 1.4339e-04
Loss = 1.2263e-03, PNorm = 172.0433, GNorm = 0.0695, lr_0 = 1.4329e-04
Loss = 1.3494e-03, PNorm = 172.0448, GNorm = 0.0634, lr_0 = 1.4319e-04
Loss = 1.9212e-03, PNorm = 172.0470, GNorm = 0.0408, lr_0 = 1.4310e-04
Loss = 1.8664e-03, PNorm = 172.0486, GNorm = 0.1761, lr_0 = 1.4300e-04
Loss = 1.5979e-03, PNorm = 172.0521, GNorm = 0.1343, lr_0 = 1.4290e-04
Loss = 1.0112e-03, PNorm = 172.0542, GNorm = 0.0300, lr_0 = 1.4280e-04
Loss = 4.1234e-03, PNorm = 172.0562, GNorm = 0.1204, lr_0 = 1.4270e-04
Loss = 2.1073e-03, PNorm = 172.0585, GNorm = 0.0699, lr_0 = 1.4261e-04
Loss = 1.1710e-03, PNorm = 172.0615, GNorm = 0.0444, lr_0 = 1.4251e-04
Loss = 1.0046e-03, PNorm = 172.0631, GNorm = 0.0328, lr_0 = 1.4241e-04
Loss = 2.1663e-03, PNorm = 172.0644, GNorm = 0.0915, lr_0 = 1.4231e-04
Loss = 1.3487e-03, PNorm = 172.0652, GNorm = 0.0767, lr_0 = 1.4222e-04
Loss = 1.8555e-03, PNorm = 172.0668, GNorm = 0.1006, lr_0 = 1.4212e-04
Loss = 3.9097e-03, PNorm = 172.0700, GNorm = 0.0529, lr_0 = 1.4202e-04
Loss = 1.0730e-03, PNorm = 172.0721, GNorm = 0.0910, lr_0 = 1.4192e-04
Loss = 1.1482e-03, PNorm = 172.0737, GNorm = 0.0429, lr_0 = 1.4183e-04
Loss = 2.5466e-03, PNorm = 172.0766, GNorm = 0.1800, lr_0 = 1.4173e-04
Loss = 1.5319e-03, PNorm = 172.0788, GNorm = 0.1138, lr_0 = 1.4163e-04
Loss = 1.8815e-03, PNorm = 172.0806, GNorm = 0.0951, lr_0 = 1.4153e-04
Loss = 2.9694e-03, PNorm = 172.0802, GNorm = 0.1175, lr_0 = 1.4144e-04
Loss = 2.2760e-03, PNorm = 172.0827, GNorm = 0.1289, lr_0 = 1.4134e-04
Loss = 1.4672e-03, PNorm = 172.0859, GNorm = 0.0474, lr_0 = 1.4124e-04
Loss = 1.5558e-03, PNorm = 172.0892, GNorm = 0.0703, lr_0 = 1.4115e-04
Loss = 9.0988e-04, PNorm = 172.0934, GNorm = 0.1009, lr_0 = 1.4105e-04
Loss = 9.7158e-04, PNorm = 172.0962, GNorm = 0.2567, lr_0 = 1.4095e-04
Loss = 1.3459e-03, PNorm = 172.0958, GNorm = 0.0585, lr_0 = 1.4086e-04
Loss = 1.3692e-03, PNorm = 172.0972, GNorm = 0.1054, lr_0 = 1.4076e-04
Loss = 1.3629e-03, PNorm = 172.0995, GNorm = 0.1168, lr_0 = 1.4066e-04
Loss = 2.2163e-03, PNorm = 172.1024, GNorm = 0.0840, lr_0 = 1.4057e-04
Loss = 1.9690e-03, PNorm = 172.1036, GNorm = 0.0667, lr_0 = 1.4047e-04
Loss = 1.6387e-03, PNorm = 172.1065, GNorm = 0.0912, lr_0 = 1.4038e-04
Loss = 1.6114e-03, PNorm = 172.1091, GNorm = 0.0835, lr_0 = 1.4028e-04
Loss = 2.1937e-03, PNorm = 172.1132, GNorm = 0.1238, lr_0 = 1.4018e-04
Loss = 1.2945e-03, PNorm = 172.1147, GNorm = 0.3198, lr_0 = 1.4009e-04
Loss = 3.3306e-03, PNorm = 172.1178, GNorm = 0.2517, lr_0 = 1.3999e-04
Loss = 2.2326e-03, PNorm = 172.1198, GNorm = 0.0502, lr_0 = 1.3990e-04
Loss = 1.7766e-03, PNorm = 172.1232, GNorm = 0.1845, lr_0 = 1.3980e-04
Loss = 2.3951e-03, PNorm = 172.1252, GNorm = 0.1439, lr_0 = 1.3970e-04
Loss = 3.1731e-03, PNorm = 172.1262, GNorm = 0.0854, lr_0 = 1.3961e-04
Loss = 2.1168e-03, PNorm = 172.1281, GNorm = 0.0770, lr_0 = 1.3951e-04
Loss = 9.9449e-04, PNorm = 172.1285, GNorm = 0.0702, lr_0 = 1.3942e-04
Loss = 3.3117e-03, PNorm = 172.1301, GNorm = 0.1101, lr_0 = 1.3932e-04
Loss = 1.0129e-03, PNorm = 172.1340, GNorm = 0.0547, lr_0 = 1.3923e-04
Loss = 1.0890e-03, PNorm = 172.1365, GNorm = 0.0564, lr_0 = 1.3913e-04
Loss = 2.7490e-03, PNorm = 172.1402, GNorm = 0.0673, lr_0 = 1.3904e-04
Loss = 3.1021e-03, PNorm = 172.1423, GNorm = 0.0906, lr_0 = 1.3894e-04
Validation mae = 0.277955
Epoch 26
Loss = 9.6763e-04, PNorm = 172.1445, GNorm = 0.1314, lr_0 = 1.3884e-04
Loss = 9.1224e-04, PNorm = 172.1474, GNorm = 0.0851, lr_0 = 1.3875e-04
Loss = 3.1105e-03, PNorm = 172.1486, GNorm = 0.1061, lr_0 = 1.3865e-04
Loss = 1.5287e-03, PNorm = 172.1507, GNorm = 0.2049, lr_0 = 1.3856e-04
Loss = 2.0163e-03, PNorm = 172.1518, GNorm = 0.4014, lr_0 = 1.3846e-04
Loss = 1.7188e-03, PNorm = 172.1547, GNorm = 0.0813, lr_0 = 1.3837e-04
Loss = 1.2089e-03, PNorm = 172.1558, GNorm = 0.0488, lr_0 = 1.3828e-04
Loss = 1.8342e-03, PNorm = 172.1564, GNorm = 0.0791, lr_0 = 1.3818e-04
Loss = 1.5788e-03, PNorm = 172.1564, GNorm = 0.1063, lr_0 = 1.3809e-04
Loss = 1.8161e-03, PNorm = 172.1587, GNorm = 0.1779, lr_0 = 1.3799e-04
Loss = 1.2219e-03, PNorm = 172.1611, GNorm = 0.2282, lr_0 = 1.3790e-04
Loss = 2.4819e-03, PNorm = 172.1638, GNorm = 0.1222, lr_0 = 1.3780e-04
Loss = 1.4605e-03, PNorm = 172.1683, GNorm = 0.1343, lr_0 = 1.3771e-04
Loss = 8.3960e-04, PNorm = 172.1703, GNorm = 0.0961, lr_0 = 1.3761e-04
Loss = 1.2987e-03, PNorm = 172.1723, GNorm = 0.0747, lr_0 = 1.3752e-04
Loss = 1.3264e-03, PNorm = 172.1733, GNorm = 0.0456, lr_0 = 1.3742e-04
Loss = 8.3319e-04, PNorm = 172.1754, GNorm = 0.0734, lr_0 = 1.3733e-04
Loss = 1.8048e-03, PNorm = 172.1771, GNorm = 0.1716, lr_0 = 1.3724e-04
Loss = 1.9056e-03, PNorm = 172.1793, GNorm = 0.2944, lr_0 = 1.3714e-04
Loss = 8.4247e-04, PNorm = 172.1815, GNorm = 0.0742, lr_0 = 1.3705e-04
Loss = 1.1472e-03, PNorm = 172.1828, GNorm = 0.0541, lr_0 = 1.3695e-04
Loss = 1.2533e-03, PNorm = 172.1852, GNorm = 0.0541, lr_0 = 1.3686e-04
Loss = 7.5510e-04, PNorm = 172.1869, GNorm = 0.1278, lr_0 = 1.3677e-04
Loss = 9.3377e-04, PNorm = 172.1886, GNorm = 0.2243, lr_0 = 1.3667e-04
Loss = 1.3183e-03, PNorm = 172.1911, GNorm = 0.0414, lr_0 = 1.3658e-04
Loss = 1.2035e-03, PNorm = 172.1934, GNorm = 0.0733, lr_0 = 1.3649e-04
Loss = 1.3253e-03, PNorm = 172.1961, GNorm = 0.0736, lr_0 = 1.3639e-04
Loss = 1.2798e-03, PNorm = 172.1975, GNorm = 0.1772, lr_0 = 1.3630e-04
Loss = 1.0253e-03, PNorm = 172.1990, GNorm = 0.1329, lr_0 = 1.3621e-04
Loss = 9.1914e-04, PNorm = 172.1985, GNorm = 0.0371, lr_0 = 1.3611e-04
Loss = 1.5249e-03, PNorm = 172.2004, GNorm = 0.1053, lr_0 = 1.3602e-04
Loss = 9.7876e-04, PNorm = 172.2020, GNorm = 0.0478, lr_0 = 1.3593e-04
Loss = 1.1253e-03, PNorm = 172.2028, GNorm = 0.1327, lr_0 = 1.3583e-04
Loss = 7.9354e-04, PNorm = 172.2028, GNorm = 0.0730, lr_0 = 1.3574e-04
Loss = 1.0997e-03, PNorm = 172.2041, GNorm = 0.0734, lr_0 = 1.3565e-04
Loss = 1.8509e-03, PNorm = 172.2060, GNorm = 0.1382, lr_0 = 1.3555e-04
Loss = 1.0933e-03, PNorm = 172.2080, GNorm = 0.1042, lr_0 = 1.3546e-04
Loss = 1.6748e-03, PNorm = 172.2094, GNorm = 0.1137, lr_0 = 1.3537e-04
Loss = 2.8616e-03, PNorm = 172.2120, GNorm = 0.0557, lr_0 = 1.3528e-04
Loss = 9.0745e-04, PNorm = 172.2145, GNorm = 0.0536, lr_0 = 1.3518e-04
Loss = 2.6765e-03, PNorm = 172.2158, GNorm = 0.0698, lr_0 = 1.3509e-04
Loss = 2.8223e-03, PNorm = 172.2178, GNorm = 0.2133, lr_0 = 1.3500e-04
Loss = 9.4729e-04, PNorm = 172.2195, GNorm = 0.0753, lr_0 = 1.3491e-04
Loss = 8.0851e-04, PNorm = 172.2209, GNorm = 0.2060, lr_0 = 1.3481e-04
Loss = 8.7680e-04, PNorm = 172.2226, GNorm = 0.0793, lr_0 = 1.3472e-04
Loss = 1.3130e-03, PNorm = 172.2249, GNorm = 0.1756, lr_0 = 1.3463e-04
Loss = 1.2025e-03, PNorm = 172.2257, GNorm = 0.0438, lr_0 = 1.3454e-04
Loss = 2.5302e-03, PNorm = 172.2269, GNorm = 0.0784, lr_0 = 1.3444e-04
Loss = 2.6236e-03, PNorm = 172.2278, GNorm = 0.1651, lr_0 = 1.3435e-04
Loss = 8.6421e-04, PNorm = 172.2304, GNorm = 0.1775, lr_0 = 1.3426e-04
Loss = 1.2183e-03, PNorm = 172.2338, GNorm = 0.1041, lr_0 = 1.3417e-04
Loss = 9.0963e-04, PNorm = 172.2369, GNorm = 0.0737, lr_0 = 1.3408e-04
Loss = 1.6579e-03, PNorm = 172.2393, GNorm = 0.3272, lr_0 = 1.3398e-04
Loss = 8.8521e-04, PNorm = 172.2414, GNorm = 0.0950, lr_0 = 1.3389e-04
Loss = 2.6882e-03, PNorm = 172.2443, GNorm = 0.0521, lr_0 = 1.3380e-04
Loss = 1.6936e-03, PNorm = 172.2456, GNorm = 0.0339, lr_0 = 1.3371e-04
Loss = 9.6055e-04, PNorm = 172.2482, GNorm = 0.0483, lr_0 = 1.3362e-04
Loss = 1.4094e-03, PNorm = 172.2511, GNorm = 0.1518, lr_0 = 1.3353e-04
Loss = 1.0707e-03, PNorm = 172.2531, GNorm = 0.0942, lr_0 = 1.3343e-04
Loss = 1.5825e-03, PNorm = 172.2544, GNorm = 0.0937, lr_0 = 1.3334e-04
Loss = 1.3060e-03, PNorm = 172.2541, GNorm = 0.1863, lr_0 = 1.3325e-04
Loss = 2.5035e-03, PNorm = 172.2570, GNorm = 0.1996, lr_0 = 1.3316e-04
Loss = 1.5834e-03, PNorm = 172.2581, GNorm = 0.0777, lr_0 = 1.3307e-04
Loss = 2.1440e-03, PNorm = 172.2607, GNorm = 0.0577, lr_0 = 1.3298e-04
Loss = 9.5197e-04, PNorm = 172.2625, GNorm = 0.1234, lr_0 = 1.3289e-04
Loss = 1.9398e-03, PNorm = 172.2663, GNorm = 0.1684, lr_0 = 1.3280e-04
Loss = 8.0321e-04, PNorm = 172.2678, GNorm = 0.0370, lr_0 = 1.3270e-04
Loss = 8.4145e-04, PNorm = 172.2698, GNorm = 0.1232, lr_0 = 1.3261e-04
Loss = 3.2611e-03, PNorm = 172.2712, GNorm = 0.0320, lr_0 = 1.3252e-04
Loss = 1.7362e-03, PNorm = 172.2736, GNorm = 0.0363, lr_0 = 1.3243e-04
Loss = 1.9122e-03, PNorm = 172.2750, GNorm = 0.1441, lr_0 = 1.3234e-04
Loss = 2.0549e-03, PNorm = 172.2764, GNorm = 0.0836, lr_0 = 1.3225e-04
Loss = 1.5791e-03, PNorm = 172.2781, GNorm = 0.0555, lr_0 = 1.3216e-04
Loss = 1.5421e-03, PNorm = 172.2802, GNorm = 0.0443, lr_0 = 1.3207e-04
Loss = 2.6021e-03, PNorm = 172.2820, GNorm = 0.0577, lr_0 = 1.3198e-04
Loss = 3.0548e-03, PNorm = 172.2847, GNorm = 0.2046, lr_0 = 1.3189e-04
Loss = 1.2406e-03, PNorm = 172.2856, GNorm = 0.1208, lr_0 = 1.3180e-04
Loss = 8.3356e-04, PNorm = 172.2873, GNorm = 0.2260, lr_0 = 1.3171e-04
Loss = 1.0380e-03, PNorm = 172.2886, GNorm = 0.1658, lr_0 = 1.3162e-04
Loss = 6.6634e-04, PNorm = 172.2901, GNorm = 0.0516, lr_0 = 1.3153e-04
Loss = 8.3057e-04, PNorm = 172.2920, GNorm = 0.0556, lr_0 = 1.3144e-04
Loss = 2.2772e-03, PNorm = 172.2948, GNorm = 0.1862, lr_0 = 1.3135e-04
Loss = 1.6535e-03, PNorm = 172.2978, GNorm = 0.1731, lr_0 = 1.3126e-04
Loss = 1.4690e-03, PNorm = 172.2986, GNorm = 0.0906, lr_0 = 1.3117e-04
Loss = 1.5517e-03, PNorm = 172.2992, GNorm = 0.0800, lr_0 = 1.3108e-04
Loss = 1.0665e-03, PNorm = 172.3003, GNorm = 0.0531, lr_0 = 1.3099e-04
Loss = 2.0200e-03, PNorm = 172.3019, GNorm = 0.0811, lr_0 = 1.3090e-04
Loss = 1.1658e-03, PNorm = 172.3042, GNorm = 0.0273, lr_0 = 1.3081e-04
Loss = 1.6684e-03, PNorm = 172.3056, GNorm = 0.0567, lr_0 = 1.3072e-04
Loss = 8.3458e-04, PNorm = 172.3070, GNorm = 0.0718, lr_0 = 1.3063e-04
Loss = 1.3863e-03, PNorm = 172.3079, GNorm = 0.1955, lr_0 = 1.3054e-04
Loss = 1.0319e-03, PNorm = 172.3094, GNorm = 0.0856, lr_0 = 1.3045e-04
Loss = 8.8787e-04, PNorm = 172.3120, GNorm = 0.1039, lr_0 = 1.3036e-04
Loss = 1.2754e-03, PNorm = 172.3164, GNorm = 0.0392, lr_0 = 1.3027e-04
Loss = 1.8468e-03, PNorm = 172.3184, GNorm = 0.0780, lr_0 = 1.3018e-04
Loss = 8.5461e-04, PNorm = 172.3195, GNorm = 0.0294, lr_0 = 1.3009e-04
Loss = 1.6309e-03, PNorm = 172.3211, GNorm = 0.1455, lr_0 = 1.3000e-04
Loss = 9.1490e-04, PNorm = 172.3229, GNorm = 0.0902, lr_0 = 1.2992e-04
Loss = 4.1492e-03, PNorm = 172.3246, GNorm = 0.3504, lr_0 = 1.2983e-04
Loss = 8.9293e-04, PNorm = 172.3282, GNorm = 0.0701, lr_0 = 1.2974e-04
Loss = 1.3987e-03, PNorm = 172.3298, GNorm = 0.1077, lr_0 = 1.2965e-04
Loss = 1.1789e-03, PNorm = 172.3321, GNorm = 0.1443, lr_0 = 1.2956e-04
Loss = 2.0434e-03, PNorm = 172.3325, GNorm = 0.1591, lr_0 = 1.2947e-04
Loss = 1.9353e-03, PNorm = 172.3322, GNorm = 0.1246, lr_0 = 1.2938e-04
Loss = 4.4809e-03, PNorm = 172.3337, GNorm = 0.0996, lr_0 = 1.2929e-04
Loss = 1.2069e-03, PNorm = 172.3374, GNorm = 0.0570, lr_0 = 1.2921e-04
Loss = 3.0028e-03, PNorm = 172.3398, GNorm = 0.1078, lr_0 = 1.2912e-04
Loss = 8.9286e-04, PNorm = 172.3435, GNorm = 0.1478, lr_0 = 1.2903e-04
Loss = 8.0497e-04, PNorm = 172.3461, GNorm = 0.2239, lr_0 = 1.2894e-04
Loss = 1.7946e-03, PNorm = 172.3481, GNorm = 0.1108, lr_0 = 1.2885e-04
Loss = 9.0605e-04, PNorm = 172.3508, GNorm = 0.1023, lr_0 = 1.2876e-04
Loss = 2.2177e-03, PNorm = 172.3528, GNorm = 0.0578, lr_0 = 1.2867e-04
Loss = 2.0657e-03, PNorm = 172.3545, GNorm = 0.1649, lr_0 = 1.2859e-04
Loss = 9.3823e-04, PNorm = 172.3559, GNorm = 0.0353, lr_0 = 1.2850e-04
Loss = 8.6827e-04, PNorm = 172.3588, GNorm = 0.1205, lr_0 = 1.2841e-04
Loss = 1.3361e-03, PNorm = 172.3616, GNorm = 0.0640, lr_0 = 1.2832e-04
Loss = 9.8501e-04, PNorm = 172.3640, GNorm = 0.0810, lr_0 = 1.2823e-04
Loss = 3.8708e-03, PNorm = 172.3658, GNorm = 0.1674, lr_0 = 1.2815e-04
Loss = 2.5499e-03, PNorm = 172.3680, GNorm = 0.1524, lr_0 = 1.2806e-04
Loss = 1.0377e-03, PNorm = 172.3720, GNorm = 0.0212, lr_0 = 1.2797e-04
Validation mae = 0.277742
Epoch 27
Loss = 1.8014e-03, PNorm = 172.3738, GNorm = 0.1294, lr_0 = 1.2788e-04
Loss = 1.6688e-03, PNorm = 172.3732, GNorm = 0.0425, lr_0 = 1.2780e-04
Loss = 1.3123e-03, PNorm = 172.3737, GNorm = 0.0666, lr_0 = 1.2771e-04
Loss = 8.0558e-04, PNorm = 172.3754, GNorm = 0.0707, lr_0 = 1.2762e-04
Loss = 8.1891e-04, PNorm = 172.3775, GNorm = 0.0767, lr_0 = 1.2753e-04
Loss = 1.7940e-03, PNorm = 172.3788, GNorm = 0.0503, lr_0 = 1.2745e-04
Loss = 1.7954e-03, PNorm = 172.3810, GNorm = 0.2238, lr_0 = 1.2736e-04
Loss = 1.0336e-03, PNorm = 172.3834, GNorm = 0.0715, lr_0 = 1.2727e-04
Loss = 8.2857e-04, PNorm = 172.3859, GNorm = 0.0981, lr_0 = 1.2718e-04
Loss = 1.3999e-03, PNorm = 172.3861, GNorm = 0.1435, lr_0 = 1.2710e-04
Loss = 1.5761e-03, PNorm = 172.3880, GNorm = 0.0693, lr_0 = 1.2701e-04
Loss = 8.0384e-04, PNorm = 172.3882, GNorm = 0.0978, lr_0 = 1.2692e-04
Loss = 7.5773e-04, PNorm = 172.3900, GNorm = 0.0479, lr_0 = 1.2684e-04
Loss = 1.3109e-03, PNorm = 172.3930, GNorm = 0.1782, lr_0 = 1.2675e-04
Loss = 2.6412e-03, PNorm = 172.3963, GNorm = 0.0356, lr_0 = 1.2666e-04
Loss = 1.5503e-03, PNorm = 172.3988, GNorm = 0.1112, lr_0 = 1.2658e-04
Loss = 1.5577e-03, PNorm = 172.4011, GNorm = 0.0459, lr_0 = 1.2649e-04
Loss = 9.4739e-04, PNorm = 172.4020, GNorm = 0.0758, lr_0 = 1.2640e-04
Loss = 9.4167e-04, PNorm = 172.4033, GNorm = 0.0983, lr_0 = 1.2632e-04
Loss = 1.2209e-03, PNorm = 172.4043, GNorm = 0.1647, lr_0 = 1.2623e-04
Loss = 8.2088e-04, PNorm = 172.4057, GNorm = 0.1909, lr_0 = 1.2614e-04
Loss = 2.0209e-03, PNorm = 172.4067, GNorm = 0.0803, lr_0 = 1.2606e-04
Loss = 1.1171e-03, PNorm = 172.4080, GNorm = 0.1150, lr_0 = 1.2597e-04
Loss = 8.5336e-04, PNorm = 172.4094, GNorm = 0.0268, lr_0 = 1.2588e-04
Loss = 1.9948e-03, PNorm = 172.4091, GNorm = 0.0221, lr_0 = 1.2580e-04
Loss = 1.4180e-03, PNorm = 172.4100, GNorm = 0.2952, lr_0 = 1.2571e-04
Loss = 1.7658e-03, PNorm = 172.4114, GNorm = 0.1116, lr_0 = 1.2563e-04
Loss = 6.7041e-04, PNorm = 172.4129, GNorm = 0.1831, lr_0 = 1.2554e-04
Loss = 1.0770e-03, PNorm = 172.4139, GNorm = 0.2252, lr_0 = 1.2545e-04
Loss = 1.2762e-03, PNorm = 172.4143, GNorm = 0.1236, lr_0 = 1.2537e-04
Loss = 1.9720e-03, PNorm = 172.4152, GNorm = 0.0889, lr_0 = 1.2528e-04
Loss = 3.0303e-03, PNorm = 172.4176, GNorm = 0.2726, lr_0 = 1.2520e-04
Loss = 1.4689e-03, PNorm = 172.4204, GNorm = 0.0747, lr_0 = 1.2511e-04
Loss = 3.2814e-03, PNorm = 172.4233, GNorm = 0.2260, lr_0 = 1.2502e-04
Loss = 7.4577e-04, PNorm = 172.4255, GNorm = 0.1048, lr_0 = 1.2494e-04
Loss = 1.0004e-03, PNorm = 172.4259, GNorm = 0.0376, lr_0 = 1.2485e-04
Loss = 7.6897e-04, PNorm = 172.4273, GNorm = 0.0882, lr_0 = 1.2477e-04
Loss = 2.3891e-03, PNorm = 172.4274, GNorm = 0.1456, lr_0 = 1.2468e-04
Loss = 5.9660e-04, PNorm = 172.4292, GNorm = 0.1143, lr_0 = 1.2460e-04
Loss = 9.1300e-04, PNorm = 172.4312, GNorm = 0.0731, lr_0 = 1.2451e-04
Loss = 1.0369e-03, PNorm = 172.4336, GNorm = 0.1309, lr_0 = 1.2443e-04
Loss = 7.2662e-04, PNorm = 172.4365, GNorm = 0.0378, lr_0 = 1.2434e-04
Loss = 2.3358e-03, PNorm = 172.4373, GNorm = 0.0866, lr_0 = 1.2426e-04
Loss = 1.3599e-03, PNorm = 172.4388, GNorm = 0.0531, lr_0 = 1.2417e-04
Loss = 2.7421e-03, PNorm = 172.4403, GNorm = 0.0534, lr_0 = 1.2409e-04
Loss = 7.7789e-04, PNorm = 172.4418, GNorm = 0.1548, lr_0 = 1.2400e-04
Loss = 8.2730e-04, PNorm = 172.4438, GNorm = 0.0624, lr_0 = 1.2392e-04
Loss = 1.1294e-03, PNorm = 172.4454, GNorm = 0.0543, lr_0 = 1.2383e-04
Loss = 7.5943e-04, PNorm = 172.4461, GNorm = 0.1413, lr_0 = 1.2375e-04
Loss = 1.9475e-03, PNorm = 172.4467, GNorm = 0.1017, lr_0 = 1.2366e-04
Loss = 1.6891e-03, PNorm = 172.4479, GNorm = 0.2533, lr_0 = 1.2358e-04
Loss = 9.6279e-04, PNorm = 172.4504, GNorm = 0.0444, lr_0 = 1.2349e-04
Loss = 2.2202e-03, PNorm = 172.4535, GNorm = 0.0886, lr_0 = 1.2341e-04
Loss = 7.7149e-04, PNorm = 172.4541, GNorm = 0.1478, lr_0 = 1.2332e-04
Loss = 2.3098e-03, PNorm = 172.4540, GNorm = 0.2610, lr_0 = 1.2324e-04
Loss = 8.0529e-04, PNorm = 172.4529, GNorm = 0.1511, lr_0 = 1.2315e-04
Loss = 3.0167e-03, PNorm = 172.4550, GNorm = 0.2925, lr_0 = 1.2307e-04
Loss = 1.1425e-03, PNorm = 172.4566, GNorm = 0.0933, lr_0 = 1.2298e-04
Loss = 1.4538e-03, PNorm = 172.4601, GNorm = 0.0989, lr_0 = 1.2290e-04
Loss = 2.2644e-03, PNorm = 172.4619, GNorm = 0.2265, lr_0 = 1.2282e-04
Loss = 1.0640e-03, PNorm = 172.4643, GNorm = 0.0666, lr_0 = 1.2273e-04
Loss = 7.0648e-04, PNorm = 172.4658, GNorm = 0.0882, lr_0 = 1.2265e-04
Loss = 1.7645e-03, PNorm = 172.4691, GNorm = 0.2611, lr_0 = 1.2256e-04
Loss = 7.7240e-04, PNorm = 172.4711, GNorm = 0.1543, lr_0 = 1.2248e-04
Loss = 9.3645e-04, PNorm = 172.4712, GNorm = 0.1485, lr_0 = 1.2240e-04
Loss = 1.6380e-03, PNorm = 172.4724, GNorm = 0.0729, lr_0 = 1.2231e-04
Loss = 9.6597e-04, PNorm = 172.4730, GNorm = 0.0664, lr_0 = 1.2223e-04
Loss = 1.2996e-03, PNorm = 172.4760, GNorm = 0.2019, lr_0 = 1.2214e-04
Loss = 1.6008e-03, PNorm = 172.4803, GNorm = 0.1404, lr_0 = 1.2206e-04
Loss = 2.2155e-03, PNorm = 172.4823, GNorm = 0.1242, lr_0 = 1.2198e-04
Loss = 8.3572e-04, PNorm = 172.4846, GNorm = 0.1823, lr_0 = 1.2189e-04
Loss = 8.5779e-04, PNorm = 172.4866, GNorm = 0.0509, lr_0 = 1.2181e-04
Loss = 6.8419e-04, PNorm = 172.4870, GNorm = 0.0483, lr_0 = 1.2173e-04
Loss = 1.6579e-03, PNorm = 172.4863, GNorm = 0.0585, lr_0 = 1.2164e-04
Loss = 3.5489e-03, PNorm = 172.4854, GNorm = 0.1723, lr_0 = 1.2156e-04
Loss = 1.2840e-03, PNorm = 172.4858, GNorm = 0.1434, lr_0 = 1.2148e-04
Loss = 8.2795e-04, PNorm = 172.4867, GNorm = 0.0651, lr_0 = 1.2139e-04
Loss = 1.2874e-03, PNorm = 172.4890, GNorm = 0.0748, lr_0 = 1.2131e-04
Loss = 9.4648e-04, PNorm = 172.4910, GNorm = 0.0471, lr_0 = 1.2123e-04
Loss = 1.5676e-03, PNorm = 172.4927, GNorm = 0.0393, lr_0 = 1.2114e-04
Loss = 1.9056e-03, PNorm = 172.4941, GNorm = 0.1312, lr_0 = 1.2106e-04
Loss = 9.0895e-04, PNorm = 172.4981, GNorm = 0.0824, lr_0 = 1.2098e-04
Loss = 1.4813e-03, PNorm = 172.5013, GNorm = 0.0455, lr_0 = 1.2090e-04
Loss = 2.4445e-03, PNorm = 172.5039, GNorm = 0.2961, lr_0 = 1.2081e-04
Loss = 1.4639e-03, PNorm = 172.5059, GNorm = 0.0317, lr_0 = 1.2073e-04
Loss = 1.5791e-03, PNorm = 172.5049, GNorm = 0.1051, lr_0 = 1.2065e-04
Loss = 1.1859e-03, PNorm = 172.5063, GNorm = 0.0676, lr_0 = 1.2056e-04
Loss = 2.2711e-03, PNorm = 172.5106, GNorm = 0.0772, lr_0 = 1.2048e-04
Loss = 1.7303e-03, PNorm = 172.5126, GNorm = 0.0524, lr_0 = 1.2040e-04
Loss = 2.8265e-03, PNorm = 172.5149, GNorm = 0.1123, lr_0 = 1.2032e-04
Loss = 1.8279e-03, PNorm = 172.5167, GNorm = 0.1276, lr_0 = 1.2023e-04
Loss = 1.1633e-03, PNorm = 172.5181, GNorm = 0.1273, lr_0 = 1.2015e-04
Loss = 6.7701e-04, PNorm = 172.5199, GNorm = 0.0865, lr_0 = 1.2007e-04
Loss = 2.0547e-03, PNorm = 172.5206, GNorm = 0.1049, lr_0 = 1.1999e-04
Loss = 2.5638e-03, PNorm = 172.5223, GNorm = 0.2952, lr_0 = 1.1991e-04
Loss = 8.0637e-04, PNorm = 172.5236, GNorm = 0.0232, lr_0 = 1.1982e-04
Loss = 3.3841e-03, PNorm = 172.5267, GNorm = 0.0562, lr_0 = 1.1974e-04
Loss = 1.6079e-03, PNorm = 172.5294, GNorm = 0.1718, lr_0 = 1.1966e-04
Loss = 1.5093e-03, PNorm = 172.5313, GNorm = 0.0473, lr_0 = 1.1958e-04
Loss = 7.6940e-04, PNorm = 172.5325, GNorm = 0.1839, lr_0 = 1.1950e-04
Loss = 6.6750e-04, PNorm = 172.5336, GNorm = 0.0392, lr_0 = 1.1941e-04
Loss = 6.6821e-04, PNorm = 172.5342, GNorm = 0.0937, lr_0 = 1.1933e-04
Loss = 1.0875e-03, PNorm = 172.5333, GNorm = 0.1828, lr_0 = 1.1925e-04
Loss = 1.1615e-03, PNorm = 172.5337, GNorm = 0.0947, lr_0 = 1.1917e-04
Loss = 1.1677e-03, PNorm = 172.5355, GNorm = 0.1272, lr_0 = 1.1909e-04
Loss = 7.8985e-04, PNorm = 172.5369, GNorm = 0.2337, lr_0 = 1.1901e-04
Loss = 6.6048e-04, PNorm = 172.5376, GNorm = 0.0529, lr_0 = 1.1892e-04
Loss = 1.2191e-03, PNorm = 172.5380, GNorm = 0.1097, lr_0 = 1.1884e-04
Loss = 2.0509e-03, PNorm = 172.5395, GNorm = 0.0476, lr_0 = 1.1876e-04
Loss = 1.0541e-03, PNorm = 172.5418, GNorm = 0.0793, lr_0 = 1.1868e-04
Loss = 1.7395e-03, PNorm = 172.5436, GNorm = 0.1266, lr_0 = 1.1860e-04
Loss = 1.1113e-03, PNorm = 172.5447, GNorm = 0.0671, lr_0 = 1.1852e-04
Loss = 1.1440e-03, PNorm = 172.5453, GNorm = 0.0511, lr_0 = 1.1844e-04
Loss = 7.1472e-04, PNorm = 172.5455, GNorm = 0.0717, lr_0 = 1.1835e-04
Loss = 8.9420e-04, PNorm = 172.5466, GNorm = 0.1119, lr_0 = 1.1827e-04
Loss = 1.5071e-03, PNorm = 172.5478, GNorm = 0.1323, lr_0 = 1.1819e-04
Loss = 2.4908e-03, PNorm = 172.5490, GNorm = 0.0452, lr_0 = 1.1811e-04
Loss = 9.0571e-04, PNorm = 172.5505, GNorm = 0.0677, lr_0 = 1.1803e-04
Loss = 9.8878e-04, PNorm = 172.5528, GNorm = 0.0600, lr_0 = 1.1795e-04
Loss = 1.6017e-03, PNorm = 172.5554, GNorm = 0.0448, lr_0 = 1.1787e-04
Validation mae = 0.277861
Epoch 28
Loss = 1.2082e-03, PNorm = 172.5580, GNorm = 0.0705, lr_0 = 1.1779e-04
Loss = 9.5844e-04, PNorm = 172.5601, GNorm = 0.1178, lr_0 = 1.1771e-04
Loss = 5.5181e-04, PNorm = 172.5615, GNorm = 0.1073, lr_0 = 1.1763e-04
Loss = 7.7075e-04, PNorm = 172.5629, GNorm = 0.1061, lr_0 = 1.1755e-04
Loss = 1.9851e-03, PNorm = 172.5629, GNorm = 0.1602, lr_0 = 1.1747e-04
Loss = 1.4202e-03, PNorm = 172.5624, GNorm = 0.0359, lr_0 = 1.1739e-04
Loss = 8.1193e-04, PNorm = 172.5631, GNorm = 0.0755, lr_0 = 1.1730e-04
Loss = 5.9408e-04, PNorm = 172.5637, GNorm = 0.0687, lr_0 = 1.1722e-04
Loss = 6.8749e-04, PNorm = 172.5652, GNorm = 0.0328, lr_0 = 1.1714e-04
Loss = 2.5723e-03, PNorm = 172.5672, GNorm = 0.0800, lr_0 = 1.1706e-04
Loss = 5.6098e-04, PNorm = 172.5685, GNorm = 0.0904, lr_0 = 1.1698e-04
Loss = 1.3582e-03, PNorm = 172.5699, GNorm = 0.2473, lr_0 = 1.1690e-04
Loss = 6.3050e-04, PNorm = 172.5706, GNorm = 0.0662, lr_0 = 1.1682e-04
Loss = 1.4137e-03, PNorm = 172.5717, GNorm = 0.1022, lr_0 = 1.1674e-04
Loss = 6.0595e-04, PNorm = 172.5734, GNorm = 0.0633, lr_0 = 1.1666e-04
Loss = 5.6002e-04, PNorm = 172.5747, GNorm = 0.0443, lr_0 = 1.1658e-04
Loss = 2.9209e-03, PNorm = 172.5752, GNorm = 0.0420, lr_0 = 1.1650e-04
Loss = 1.1790e-03, PNorm = 172.5770, GNorm = 0.0815, lr_0 = 1.1642e-04
Loss = 6.3194e-04, PNorm = 172.5781, GNorm = 0.0651, lr_0 = 1.1634e-04
Loss = 1.3063e-03, PNorm = 172.5788, GNorm = 0.0338, lr_0 = 1.1626e-04
Loss = 7.7545e-04, PNorm = 172.5801, GNorm = 0.1276, lr_0 = 1.1618e-04
Loss = 1.9137e-03, PNorm = 172.5803, GNorm = 0.0537, lr_0 = 1.1611e-04
Loss = 6.1999e-04, PNorm = 172.5815, GNorm = 0.0978, lr_0 = 1.1603e-04
Loss = 1.9210e-03, PNorm = 172.5834, GNorm = 0.0918, lr_0 = 1.1595e-04
Loss = 5.6065e-04, PNorm = 172.5847, GNorm = 0.0374, lr_0 = 1.1587e-04
Loss = 8.3126e-04, PNorm = 172.5856, GNorm = 0.1087, lr_0 = 1.1579e-04
Loss = 6.0587e-04, PNorm = 172.5863, GNorm = 0.0266, lr_0 = 1.1571e-04
Loss = 9.5483e-04, PNorm = 172.5884, GNorm = 0.0439, lr_0 = 1.1563e-04
Loss = 6.6676e-04, PNorm = 172.5884, GNorm = 0.1719, lr_0 = 1.1555e-04
Loss = 4.3806e-03, PNorm = 172.5900, GNorm = 0.0491, lr_0 = 1.1547e-04
Loss = 9.1329e-04, PNorm = 172.5907, GNorm = 0.0356, lr_0 = 1.1539e-04
Loss = 5.4738e-04, PNorm = 172.5912, GNorm = 0.0552, lr_0 = 1.1531e-04
Loss = 1.9116e-03, PNorm = 172.5916, GNorm = 0.0716, lr_0 = 1.1523e-04
Loss = 1.2288e-03, PNorm = 172.5912, GNorm = 0.4473, lr_0 = 1.1515e-04
Loss = 7.0901e-04, PNorm = 172.5934, GNorm = 0.0845, lr_0 = 1.1508e-04
Loss = 1.6911e-03, PNorm = 172.5947, GNorm = 0.0435, lr_0 = 1.1500e-04
Loss = 4.0099e-03, PNorm = 172.5956, GNorm = 0.0575, lr_0 = 1.1492e-04
Loss = 1.2684e-03, PNorm = 172.5954, GNorm = 0.1328, lr_0 = 1.1484e-04
Loss = 8.3271e-04, PNorm = 172.5976, GNorm = 0.1505, lr_0 = 1.1476e-04
Loss = 7.8925e-04, PNorm = 172.6002, GNorm = 0.0833, lr_0 = 1.1468e-04
Loss = 3.3258e-03, PNorm = 172.6018, GNorm = 0.0496, lr_0 = 1.1460e-04
Loss = 1.4672e-03, PNorm = 172.6038, GNorm = 0.1319, lr_0 = 1.1452e-04
Loss = 5.3076e-04, PNorm = 172.6039, GNorm = 0.0794, lr_0 = 1.1445e-04
Loss = 7.8570e-04, PNorm = 172.6048, GNorm = 0.1312, lr_0 = 1.1437e-04
Loss = 8.6241e-04, PNorm = 172.6062, GNorm = 0.0448, lr_0 = 1.1429e-04
Loss = 6.2694e-04, PNorm = 172.6078, GNorm = 0.0745, lr_0 = 1.1421e-04
Loss = 9.2429e-04, PNorm = 172.6091, GNorm = 0.1577, lr_0 = 1.1413e-04
Loss = 8.8134e-04, PNorm = 172.6093, GNorm = 0.0576, lr_0 = 1.1405e-04
Loss = 1.3715e-03, PNorm = 172.6098, GNorm = 0.0888, lr_0 = 1.1398e-04
Loss = 1.0042e-03, PNorm = 172.6120, GNorm = 0.1546, lr_0 = 1.1390e-04
Loss = 2.2847e-03, PNorm = 172.6137, GNorm = 0.0185, lr_0 = 1.1382e-04
Loss = 2.4229e-03, PNorm = 172.6165, GNorm = 0.0772, lr_0 = 1.1374e-04
Loss = 1.8059e-03, PNorm = 172.6178, GNorm = 0.1386, lr_0 = 1.1366e-04
Loss = 7.5185e-04, PNorm = 172.6188, GNorm = 0.0839, lr_0 = 1.1359e-04
Loss = 1.3074e-03, PNorm = 172.6194, GNorm = 0.0648, lr_0 = 1.1351e-04
Loss = 1.8279e-03, PNorm = 172.6199, GNorm = 0.0350, lr_0 = 1.1343e-04
Loss = 8.3264e-04, PNorm = 172.6200, GNorm = 0.0589, lr_0 = 1.1335e-04
Loss = 1.2203e-03, PNorm = 172.6207, GNorm = 0.0462, lr_0 = 1.1328e-04
Loss = 1.4338e-03, PNorm = 172.6227, GNorm = 0.2409, lr_0 = 1.1320e-04
Loss = 8.7355e-04, PNorm = 172.6253, GNorm = 0.0830, lr_0 = 1.1312e-04
Loss = 6.2497e-04, PNorm = 172.6260, GNorm = 0.0897, lr_0 = 1.1304e-04
Loss = 7.0451e-04, PNorm = 172.6276, GNorm = 0.1281, lr_0 = 1.1297e-04
Loss = 6.5404e-04, PNorm = 172.6294, GNorm = 0.0882, lr_0 = 1.1289e-04
Loss = 9.9763e-04, PNorm = 172.6302, GNorm = 0.1414, lr_0 = 1.1281e-04
Loss = 2.9860e-03, PNorm = 172.6311, GNorm = 0.0559, lr_0 = 1.1273e-04
Loss = 1.5157e-03, PNorm = 172.6312, GNorm = 0.0644, lr_0 = 1.1266e-04
Loss = 1.1631e-03, PNorm = 172.6324, GNorm = 0.0457, lr_0 = 1.1258e-04
Loss = 2.3561e-03, PNorm = 172.6346, GNorm = 0.1401, lr_0 = 1.1250e-04
Loss = 7.2015e-04, PNorm = 172.6372, GNorm = 0.1362, lr_0 = 1.1243e-04
Loss = 2.2101e-03, PNorm = 172.6397, GNorm = 0.0600, lr_0 = 1.1235e-04
Loss = 1.5552e-03, PNorm = 172.6416, GNorm = 0.2023, lr_0 = 1.1227e-04
Loss = 7.4332e-04, PNorm = 172.6440, GNorm = 0.1483, lr_0 = 1.1219e-04
Loss = 8.2835e-04, PNorm = 172.6459, GNorm = 0.1025, lr_0 = 1.1212e-04
Loss = 1.5211e-03, PNorm = 172.6472, GNorm = 0.0753, lr_0 = 1.1204e-04
Loss = 2.2413e-03, PNorm = 172.6482, GNorm = 0.0510, lr_0 = 1.1196e-04
Loss = 6.6044e-04, PNorm = 172.6489, GNorm = 0.1246, lr_0 = 1.1189e-04
Loss = 6.0336e-04, PNorm = 172.6501, GNorm = 0.1125, lr_0 = 1.1181e-04
Loss = 1.8874e-03, PNorm = 172.6498, GNorm = 0.2476, lr_0 = 1.1173e-04
Loss = 1.3313e-03, PNorm = 172.6513, GNorm = 0.0997, lr_0 = 1.1166e-04
Loss = 1.0108e-03, PNorm = 172.6518, GNorm = 0.0412, lr_0 = 1.1158e-04
Loss = 7.4167e-04, PNorm = 172.6542, GNorm = 0.1050, lr_0 = 1.1150e-04
Loss = 1.9952e-03, PNorm = 172.6559, GNorm = 0.0826, lr_0 = 1.1143e-04
Loss = 8.2735e-04, PNorm = 172.6576, GNorm = 0.0556, lr_0 = 1.1135e-04
Loss = 8.0423e-04, PNorm = 172.6594, GNorm = 0.0923, lr_0 = 1.1128e-04
Loss = 1.6270e-03, PNorm = 172.6609, GNorm = 0.0968, lr_0 = 1.1120e-04
Loss = 2.2413e-03, PNorm = 172.6632, GNorm = 0.1357, lr_0 = 1.1112e-04
Loss = 1.2635e-03, PNorm = 172.6666, GNorm = 0.0772, lr_0 = 1.1105e-04
Loss = 2.6483e-03, PNorm = 172.6688, GNorm = 0.1207, lr_0 = 1.1097e-04
Loss = 2.1676e-03, PNorm = 172.6697, GNorm = 0.0926, lr_0 = 1.1089e-04
Loss = 6.8281e-04, PNorm = 172.6715, GNorm = 0.0731, lr_0 = 1.1082e-04
Loss = 1.3357e-03, PNorm = 172.6736, GNorm = 0.2786, lr_0 = 1.1074e-04
Loss = 1.5752e-03, PNorm = 172.6741, GNorm = 0.2070, lr_0 = 1.1067e-04
Loss = 1.0662e-03, PNorm = 172.6747, GNorm = 0.0709, lr_0 = 1.1059e-04
Loss = 2.2158e-03, PNorm = 172.6754, GNorm = 0.0639, lr_0 = 1.1052e-04
Loss = 7.5536e-04, PNorm = 172.6769, GNorm = 0.0449, lr_0 = 1.1044e-04
Loss = 8.2547e-04, PNorm = 172.6782, GNorm = 0.0588, lr_0 = 1.1036e-04
Loss = 6.5514e-04, PNorm = 172.6806, GNorm = 0.0835, lr_0 = 1.1029e-04
Loss = 5.9465e-04, PNorm = 172.6837, GNorm = 0.1287, lr_0 = 1.1021e-04
Loss = 7.0536e-04, PNorm = 172.6851, GNorm = 0.0550, lr_0 = 1.1014e-04
Loss = 1.5556e-03, PNorm = 172.6863, GNorm = 0.1186, lr_0 = 1.1006e-04
Loss = 1.0142e-03, PNorm = 172.6879, GNorm = 0.0489, lr_0 = 1.0999e-04
Loss = 3.5943e-03, PNorm = 172.6884, GNorm = 0.1748, lr_0 = 1.0991e-04
Loss = 5.9099e-04, PNorm = 172.6896, GNorm = 0.1275, lr_0 = 1.0984e-04
Loss = 1.5127e-03, PNorm = 172.6909, GNorm = 0.2938, lr_0 = 1.0976e-04
Loss = 9.6659e-04, PNorm = 172.6923, GNorm = 0.0546, lr_0 = 1.0969e-04
Loss = 3.6591e-03, PNorm = 172.6944, GNorm = 0.0697, lr_0 = 1.0961e-04
Loss = 1.4269e-03, PNorm = 172.6978, GNorm = 0.1977, lr_0 = 1.0954e-04
Loss = 1.4239e-03, PNorm = 172.7004, GNorm = 0.0737, lr_0 = 1.0946e-04
Loss = 6.6767e-04, PNorm = 172.7013, GNorm = 0.0489, lr_0 = 1.0939e-04
Loss = 8.7061e-04, PNorm = 172.7021, GNorm = 0.0702, lr_0 = 1.0931e-04
Loss = 7.0842e-04, PNorm = 172.7024, GNorm = 0.0789, lr_0 = 1.0924e-04
Loss = 2.8701e-03, PNorm = 172.7025, GNorm = 0.0325, lr_0 = 1.0916e-04
Loss = 8.1764e-04, PNorm = 172.7044, GNorm = 0.0774, lr_0 = 1.0909e-04
Loss = 1.3270e-03, PNorm = 172.7060, GNorm = 0.1598, lr_0 = 1.0901e-04
Loss = 1.0643e-03, PNorm = 172.7086, GNorm = 0.0754, lr_0 = 1.0894e-04
Loss = 8.0657e-04, PNorm = 172.7106, GNorm = 0.1466, lr_0 = 1.0886e-04
Loss = 2.1806e-03, PNorm = 172.7127, GNorm = 0.0814, lr_0 = 1.0879e-04
Loss = 8.8405e-04, PNorm = 172.7145, GNorm = 0.0234, lr_0 = 1.0871e-04
Loss = 8.5952e-04, PNorm = 172.7163, GNorm = 0.1189, lr_0 = 1.0864e-04
Loss = 1.0922e-03, PNorm = 172.7183, GNorm = 0.0711, lr_0 = 1.0856e-04
Validation mae = 0.277903
Epoch 29
Loss = 3.9761e-03, PNorm = 172.7188, GNorm = 0.2249, lr_0 = 1.0849e-04
Loss = 6.0726e-04, PNorm = 172.7191, GNorm = 0.0475, lr_0 = 1.0841e-04
Loss = 7.8911e-04, PNorm = 172.7189, GNorm = 0.1331, lr_0 = 1.0834e-04
Loss = 7.1373e-04, PNorm = 172.7193, GNorm = 0.1041, lr_0 = 1.0827e-04
Loss = 1.9394e-03, PNorm = 172.7190, GNorm = 0.2027, lr_0 = 1.0819e-04
Loss = 9.8372e-04, PNorm = 172.7196, GNorm = 0.1325, lr_0 = 1.0812e-04
Loss = 5.2880e-04, PNorm = 172.7198, GNorm = 0.1168, lr_0 = 1.0804e-04
Loss = 5.6499e-04, PNorm = 172.7202, GNorm = 0.1695, lr_0 = 1.0797e-04
Loss = 2.1190e-03, PNorm = 172.7224, GNorm = 0.0832, lr_0 = 1.0790e-04
Loss = 7.2706e-04, PNorm = 172.7244, GNorm = 0.0440, lr_0 = 1.0782e-04
Loss = 2.2529e-03, PNorm = 172.7255, GNorm = 0.0549, lr_0 = 1.0775e-04
Loss = 1.3951e-03, PNorm = 172.7272, GNorm = 0.0718, lr_0 = 1.0767e-04
Loss = 5.1106e-04, PNorm = 172.7274, GNorm = 0.0608, lr_0 = 1.0760e-04
Loss = 1.2697e-03, PNorm = 172.7278, GNorm = 0.1188, lr_0 = 1.0753e-04
Loss = 4.7048e-04, PNorm = 172.7291, GNorm = 0.1494, lr_0 = 1.0745e-04
Loss = 1.4763e-03, PNorm = 172.7299, GNorm = 0.0266, lr_0 = 1.0738e-04
Loss = 7.2037e-04, PNorm = 172.7307, GNorm = 0.1010, lr_0 = 1.0731e-04
Loss = 6.0069e-04, PNorm = 172.7323, GNorm = 0.0419, lr_0 = 1.0723e-04
Loss = 1.8050e-03, PNorm = 172.7333, GNorm = 0.0682, lr_0 = 1.0716e-04
Loss = 5.6746e-04, PNorm = 172.7343, GNorm = 0.0499, lr_0 = 1.0709e-04
Loss = 1.8298e-03, PNorm = 172.7354, GNorm = 0.0779, lr_0 = 1.0701e-04
Loss = 9.5747e-04, PNorm = 172.7365, GNorm = 0.1040, lr_0 = 1.0694e-04
Loss = 4.9262e-04, PNorm = 172.7381, GNorm = 0.0389, lr_0 = 1.0687e-04
Loss = 6.3776e-04, PNorm = 172.7385, GNorm = 0.1206, lr_0 = 1.0679e-04
Loss = 5.8979e-04, PNorm = 172.7391, GNorm = 0.0665, lr_0 = 1.0672e-04
Loss = 2.0233e-03, PNorm = 172.7379, GNorm = 0.3558, lr_0 = 1.0665e-04
Loss = 6.4146e-04, PNorm = 172.7375, GNorm = 0.0477, lr_0 = 1.0657e-04
Loss = 2.3589e-03, PNorm = 172.7382, GNorm = 0.1325, lr_0 = 1.0650e-04
Loss = 1.0397e-03, PNorm = 172.7379, GNorm = 0.0294, lr_0 = 1.0643e-04
Loss = 4.2877e-04, PNorm = 172.7385, GNorm = 0.0609, lr_0 = 1.0635e-04
Loss = 5.7625e-04, PNorm = 172.7399, GNorm = 0.0626, lr_0 = 1.0628e-04
Loss = 5.6871e-04, PNorm = 172.7410, GNorm = 0.1372, lr_0 = 1.0621e-04
Loss = 1.4692e-03, PNorm = 172.7430, GNorm = 0.0755, lr_0 = 1.0614e-04
Loss = 8.4513e-04, PNorm = 172.7441, GNorm = 0.0277, lr_0 = 1.0606e-04
Loss = 5.8593e-04, PNorm = 172.7442, GNorm = 0.0764, lr_0 = 1.0599e-04
Loss = 1.5275e-03, PNorm = 172.7448, GNorm = 0.0902, lr_0 = 1.0592e-04
Loss = 7.1260e-04, PNorm = 172.7449, GNorm = 0.1045, lr_0 = 1.0585e-04
Loss = 1.9382e-03, PNorm = 172.7460, GNorm = 0.2850, lr_0 = 1.0577e-04
Loss = 1.2271e-03, PNorm = 172.7486, GNorm = 0.0463, lr_0 = 1.0570e-04
Loss = 9.7556e-04, PNorm = 172.7519, GNorm = 0.0350, lr_0 = 1.0563e-04
Loss = 9.5367e-04, PNorm = 172.7523, GNorm = 0.0663, lr_0 = 1.0556e-04
Loss = 5.8135e-04, PNorm = 172.7521, GNorm = 0.0759, lr_0 = 1.0548e-04
Loss = 6.5510e-04, PNorm = 172.7532, GNorm = 0.0215, lr_0 = 1.0541e-04
Loss = 5.2761e-04, PNorm = 172.7549, GNorm = 0.0994, lr_0 = 1.0534e-04
Loss = 2.6853e-03, PNorm = 172.7555, GNorm = 0.0533, lr_0 = 1.0527e-04
Loss = 4.4530e-04, PNorm = 172.7571, GNorm = 0.0771, lr_0 = 1.0519e-04
Loss = 2.2267e-03, PNorm = 172.7586, GNorm = 0.0531, lr_0 = 1.0512e-04
Loss = 5.6410e-04, PNorm = 172.7604, GNorm = 0.0748, lr_0 = 1.0505e-04
Loss = 5.9589e-04, PNorm = 172.7627, GNorm = 0.1313, lr_0 = 1.0498e-04
Loss = 1.0596e-03, PNorm = 172.7643, GNorm = 0.0416, lr_0 = 1.0491e-04
Loss = 1.0515e-03, PNorm = 172.7663, GNorm = 0.0835, lr_0 = 1.0483e-04
Loss = 1.1272e-03, PNorm = 172.7676, GNorm = 0.0535, lr_0 = 1.0476e-04
Loss = 1.7655e-03, PNorm = 172.7687, GNorm = 0.0358, lr_0 = 1.0469e-04
Loss = 5.8530e-04, PNorm = 172.7706, GNorm = 0.1164, lr_0 = 1.0462e-04
Loss = 8.7560e-04, PNorm = 172.7718, GNorm = 0.0396, lr_0 = 1.0455e-04
Loss = 1.7872e-03, PNorm = 172.7730, GNorm = 0.0966, lr_0 = 1.0448e-04
Loss = 1.6400e-03, PNorm = 172.7743, GNorm = 0.1132, lr_0 = 1.0440e-04
Loss = 1.5375e-03, PNorm = 172.7761, GNorm = 0.1583, lr_0 = 1.0433e-04
Loss = 1.0775e-03, PNorm = 172.7777, GNorm = 0.0414, lr_0 = 1.0426e-04
Loss = 1.2057e-03, PNorm = 172.7784, GNorm = 0.0879, lr_0 = 1.0419e-04
Loss = 8.0722e-04, PNorm = 172.7789, GNorm = 0.1069, lr_0 = 1.0412e-04
Loss = 1.1043e-03, PNorm = 172.7799, GNorm = 0.0932, lr_0 = 1.0405e-04
Loss = 6.2684e-04, PNorm = 172.7818, GNorm = 0.0937, lr_0 = 1.0398e-04
Loss = 8.5775e-04, PNorm = 172.7833, GNorm = 0.0414, lr_0 = 1.0391e-04
Loss = 1.1868e-03, PNorm = 172.7859, GNorm = 0.0609, lr_0 = 1.0383e-04
Loss = 1.4204e-03, PNorm = 172.7871, GNorm = 0.0587, lr_0 = 1.0376e-04
Loss = 1.1357e-03, PNorm = 172.7875, GNorm = 0.0456, lr_0 = 1.0369e-04
Loss = 1.1373e-03, PNorm = 172.7872, GNorm = 0.0926, lr_0 = 1.0362e-04
Loss = 6.3367e-04, PNorm = 172.7876, GNorm = 0.1206, lr_0 = 1.0355e-04
Loss = 1.7357e-03, PNorm = 172.7893, GNorm = 0.0857, lr_0 = 1.0348e-04
Loss = 1.6328e-03, PNorm = 172.7907, GNorm = 0.1777, lr_0 = 1.0341e-04
Loss = 1.5094e-03, PNorm = 172.7919, GNorm = 0.0307, lr_0 = 1.0334e-04
Loss = 7.9590e-04, PNorm = 172.7929, GNorm = 0.0231, lr_0 = 1.0327e-04
Loss = 9.1511e-04, PNorm = 172.7953, GNorm = 0.2148, lr_0 = 1.0320e-04
Loss = 1.7966e-03, PNorm = 172.7965, GNorm = 0.0445, lr_0 = 1.0312e-04
Loss = 1.0814e-03, PNorm = 172.7979, GNorm = 0.1642, lr_0 = 1.0305e-04
Loss = 4.2602e-04, PNorm = 172.7987, GNorm = 0.0734, lr_0 = 1.0298e-04
Loss = 9.8080e-04, PNorm = 172.8000, GNorm = 0.1236, lr_0 = 1.0291e-04
Loss = 7.5598e-04, PNorm = 172.8023, GNorm = 0.0425, lr_0 = 1.0284e-04
Loss = 1.2223e-03, PNorm = 172.8040, GNorm = 0.1043, lr_0 = 1.0277e-04
Loss = 1.9637e-03, PNorm = 172.8045, GNorm = 0.0915, lr_0 = 1.0270e-04
Loss = 2.5492e-03, PNorm = 172.8068, GNorm = 0.2862, lr_0 = 1.0263e-04
Loss = 8.2958e-04, PNorm = 172.8086, GNorm = 0.0904, lr_0 = 1.0256e-04
Loss = 9.0131e-04, PNorm = 172.8090, GNorm = 0.0719, lr_0 = 1.0249e-04
Loss = 8.9674e-04, PNorm = 172.8092, GNorm = 0.0857, lr_0 = 1.0242e-04
Loss = 7.4210e-04, PNorm = 172.8093, GNorm = 0.0980, lr_0 = 1.0235e-04
Loss = 5.0415e-04, PNorm = 172.8100, GNorm = 0.0498, lr_0 = 1.0228e-04
Loss = 6.3273e-04, PNorm = 172.8118, GNorm = 0.0494, lr_0 = 1.0221e-04
Loss = 1.2233e-03, PNorm = 172.8125, GNorm = 0.0594, lr_0 = 1.0214e-04
Loss = 7.1394e-04, PNorm = 172.8139, GNorm = 0.0666, lr_0 = 1.0207e-04
Loss = 1.7796e-03, PNorm = 172.8149, GNorm = 0.0474, lr_0 = 1.0200e-04
Loss = 1.5644e-03, PNorm = 172.8160, GNorm = 0.1400, lr_0 = 1.0193e-04
Loss = 1.0160e-03, PNorm = 172.8166, GNorm = 0.0370, lr_0 = 1.0186e-04
Loss = 8.1957e-04, PNorm = 172.8170, GNorm = 0.0509, lr_0 = 1.0179e-04
Loss = 1.3194e-03, PNorm = 172.8193, GNorm = 0.0574, lr_0 = 1.0172e-04
Loss = 1.0813e-03, PNorm = 172.8198, GNorm = 0.0745, lr_0 = 1.0165e-04
Loss = 1.1518e-03, PNorm = 172.8224, GNorm = 0.0732, lr_0 = 1.0158e-04
Loss = 6.1906e-04, PNorm = 172.8242, GNorm = 0.1190, lr_0 = 1.0151e-04
Loss = 2.2362e-03, PNorm = 172.8257, GNorm = 0.0274, lr_0 = 1.0144e-04
Loss = 1.5510e-03, PNorm = 172.8274, GNorm = 0.0355, lr_0 = 1.0137e-04
Loss = 1.7333e-03, PNorm = 172.8287, GNorm = 0.0823, lr_0 = 1.0130e-04
Loss = 3.0916e-03, PNorm = 172.8299, GNorm = 0.0362, lr_0 = 1.0123e-04
Loss = 1.9753e-03, PNorm = 172.8310, GNorm = 0.1615, lr_0 = 1.0116e-04
Loss = 1.7063e-03, PNorm = 172.8328, GNorm = 0.0566, lr_0 = 1.0110e-04
Loss = 1.5661e-03, PNorm = 172.8331, GNorm = 0.1092, lr_0 = 1.0103e-04
Loss = 3.5303e-03, PNorm = 172.8331, GNorm = 0.0851, lr_0 = 1.0096e-04
Loss = 6.1820e-04, PNorm = 172.8342, GNorm = 0.0821, lr_0 = 1.0089e-04
Loss = 2.4789e-03, PNorm = 172.8346, GNorm = 0.0956, lr_0 = 1.0082e-04
Loss = 1.6823e-03, PNorm = 172.8348, GNorm = 0.0208, lr_0 = 1.0075e-04
Loss = 1.0243e-03, PNorm = 172.8362, GNorm = 0.0821, lr_0 = 1.0068e-04
Loss = 6.8182e-04, PNorm = 172.8380, GNorm = 0.1182, lr_0 = 1.0061e-04
Loss = 1.5653e-03, PNorm = 172.8398, GNorm = 0.0840, lr_0 = 1.0054e-04
Loss = 6.4032e-04, PNorm = 172.8407, GNorm = 0.0950, lr_0 = 1.0047e-04
Loss = 6.5801e-04, PNorm = 172.8428, GNorm = 0.0536, lr_0 = 1.0041e-04
Loss = 4.8604e-04, PNorm = 172.8432, GNorm = 0.0689, lr_0 = 1.0034e-04
Loss = 2.3887e-03, PNorm = 172.8449, GNorm = 0.0667, lr_0 = 1.0027e-04
Loss = 7.3701e-04, PNorm = 172.8458, GNorm = 0.0910, lr_0 = 1.0020e-04
Loss = 1.4297e-03, PNorm = 172.8481, GNorm = 0.0581, lr_0 = 1.0013e-04
Loss = 7.5029e-04, PNorm = 172.8493, GNorm = 0.0859, lr_0 = 1.0006e-04
Loss = 6.5823e-04, PNorm = 172.8503, GNorm = 0.0638, lr_0 = 1.0000e-04
Validation mae = 0.277831
Model 0 best validation mae = 0.277742 on epoch 26
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.272474
Ensemble test mae = 0.272474
Fold 7
Splitting data with seed 7
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 8.9753e-01, PNorm = 65.7657, GNorm = 1.8268, lr_0 = 1.0413e-04
Loss = 7.0645e-01, PNorm = 65.7793, GNorm = 2.0804, lr_0 = 1.0788e-04
Loss = 5.1027e-01, PNorm = 65.7929, GNorm = 2.8916, lr_0 = 1.1163e-04
Loss = 4.8158e-01, PNorm = 65.8039, GNorm = 2.3281, lr_0 = 1.1537e-04
Loss = 4.8800e-01, PNorm = 65.8139, GNorm = 2.6059, lr_0 = 1.1913e-04
Loss = 4.1898e-01, PNorm = 65.8235, GNorm = 2.3577, lr_0 = 1.2287e-04
Loss = 4.1481e-01, PNorm = 65.8333, GNorm = 2.2835, lr_0 = 1.2663e-04
Loss = 3.9192e-01, PNorm = 65.8433, GNorm = 1.4828, lr_0 = 1.3038e-04
Loss = 4.2988e-01, PNorm = 65.8535, GNorm = 4.3398, lr_0 = 1.3413e-04
Loss = 4.3459e-01, PNorm = 65.8622, GNorm = 2.7083, lr_0 = 1.3788e-04
Loss = 4.2527e-01, PNorm = 65.8723, GNorm = 4.5345, lr_0 = 1.4163e-04
Loss = 3.8688e-01, PNorm = 65.8831, GNorm = 1.9704, lr_0 = 1.4537e-04
Loss = 3.3768e-01, PNorm = 65.8929, GNorm = 2.6045, lr_0 = 1.4913e-04
Loss = 3.9839e-01, PNorm = 65.9047, GNorm = 1.8254, lr_0 = 1.5288e-04
Loss = 3.7719e-01, PNorm = 65.9154, GNorm = 2.7465, lr_0 = 1.5662e-04
Loss = 3.8737e-01, PNorm = 65.9258, GNorm = 2.3410, lr_0 = 1.6038e-04
Loss = 3.2856e-01, PNorm = 65.9388, GNorm = 1.9518, lr_0 = 1.6412e-04
Loss = 3.4677e-01, PNorm = 65.9516, GNorm = 1.8365, lr_0 = 1.6788e-04
Loss = 3.7940e-01, PNorm = 65.9626, GNorm = 1.9522, lr_0 = 1.7163e-04
Loss = 3.5861e-01, PNorm = 65.9756, GNorm = 2.5067, lr_0 = 1.7538e-04
Loss = 3.4484e-01, PNorm = 65.9887, GNorm = 1.7173, lr_0 = 1.7913e-04
Loss = 3.7473e-01, PNorm = 66.0019, GNorm = 4.4389, lr_0 = 1.8288e-04
Loss = 3.3624e-01, PNorm = 66.0181, GNorm = 1.6145, lr_0 = 1.8662e-04
Loss = 2.8729e-01, PNorm = 66.0335, GNorm = 1.4085, lr_0 = 1.9038e-04
Loss = 2.9150e-01, PNorm = 66.0447, GNorm = 2.0042, lr_0 = 1.9413e-04
Loss = 3.0974e-01, PNorm = 66.0580, GNorm = 2.3219, lr_0 = 1.9788e-04
Loss = 3.1908e-01, PNorm = 66.0735, GNorm = 1.7113, lr_0 = 2.0163e-04
Loss = 2.9558e-01, PNorm = 66.0878, GNorm = 1.7284, lr_0 = 2.0537e-04
Loss = 3.0458e-01, PNorm = 66.1039, GNorm = 1.3104, lr_0 = 2.0913e-04
Loss = 3.0974e-01, PNorm = 66.1217, GNorm = 1.3959, lr_0 = 2.1288e-04
Loss = 3.4215e-01, PNorm = 66.1382, GNorm = 1.6223, lr_0 = 2.1663e-04
Loss = 3.2175e-01, PNorm = 66.1560, GNorm = 1.2465, lr_0 = 2.2038e-04
Loss = 3.2202e-01, PNorm = 66.1724, GNorm = 1.5352, lr_0 = 2.2412e-04
Loss = 2.8751e-01, PNorm = 66.1871, GNorm = 2.0995, lr_0 = 2.2787e-04
Loss = 3.1335e-01, PNorm = 66.2039, GNorm = 1.5200, lr_0 = 2.3163e-04
Loss = 2.5832e-01, PNorm = 66.2225, GNorm = 1.8308, lr_0 = 2.3538e-04
Loss = 3.2001e-01, PNorm = 66.2369, GNorm = 2.8096, lr_0 = 2.3913e-04
Loss = 3.4792e-01, PNorm = 66.2552, GNorm = 1.2639, lr_0 = 2.4288e-04
Loss = 3.9380e-01, PNorm = 66.2788, GNorm = 1.6358, lr_0 = 2.4662e-04
Loss = 2.9262e-01, PNorm = 66.3013, GNorm = 1.6203, lr_0 = 2.5038e-04
Loss = 2.7522e-01, PNorm = 66.3239, GNorm = 1.2827, lr_0 = 2.5413e-04
Loss = 2.6404e-01, PNorm = 66.3428, GNorm = 1.4618, lr_0 = 2.5788e-04
Loss = 3.2372e-01, PNorm = 66.3625, GNorm = 1.7890, lr_0 = 2.6163e-04
Loss = 3.2481e-01, PNorm = 66.3840, GNorm = 1.2128, lr_0 = 2.6537e-04
Loss = 2.9110e-01, PNorm = 66.4089, GNorm = 1.6365, lr_0 = 2.6912e-04
Loss = 3.2792e-01, PNorm = 66.4343, GNorm = 1.8481, lr_0 = 2.7288e-04
Loss = 3.1855e-01, PNorm = 66.4601, GNorm = 1.6160, lr_0 = 2.7663e-04
Loss = 3.0201e-01, PNorm = 66.4881, GNorm = 1.5614, lr_0 = 2.8038e-04
Loss = 3.5065e-01, PNorm = 66.5148, GNorm = 2.7681, lr_0 = 2.8413e-04
Loss = 3.6601e-01, PNorm = 66.5421, GNorm = 2.3289, lr_0 = 2.8787e-04
Loss = 3.6221e-01, PNorm = 66.5753, GNorm = 1.5109, lr_0 = 2.9163e-04
Loss = 3.4300e-01, PNorm = 66.6059, GNorm = 2.0259, lr_0 = 2.9538e-04
Loss = 3.1443e-01, PNorm = 66.6345, GNorm = 1.0893, lr_0 = 2.9913e-04
Loss = 3.0594e-01, PNorm = 66.6614, GNorm = 1.2444, lr_0 = 3.0288e-04
Loss = 2.9571e-01, PNorm = 66.6913, GNorm = 2.0387, lr_0 = 3.0662e-04
Loss = 2.4517e-01, PNorm = 66.7195, GNorm = 0.9817, lr_0 = 3.1037e-04
Loss = 3.1109e-01, PNorm = 66.7489, GNorm = 1.8391, lr_0 = 3.1413e-04
Loss = 3.2049e-01, PNorm = 66.7775, GNorm = 1.3630, lr_0 = 3.1788e-04
Loss = 2.5079e-01, PNorm = 66.8091, GNorm = 1.2009, lr_0 = 3.2163e-04
Loss = 2.7374e-01, PNorm = 66.8337, GNorm = 1.3937, lr_0 = 3.2538e-04
Loss = 2.4663e-01, PNorm = 66.8612, GNorm = 1.3228, lr_0 = 3.2912e-04
Loss = 2.7189e-01, PNorm = 66.8833, GNorm = 1.1051, lr_0 = 3.3288e-04
Loss = 2.7067e-01, PNorm = 66.9145, GNorm = 1.1570, lr_0 = 3.3663e-04
Loss = 2.6551e-01, PNorm = 66.9429, GNorm = 1.3412, lr_0 = 3.4038e-04
Loss = 2.8373e-01, PNorm = 66.9697, GNorm = 1.3176, lr_0 = 3.4413e-04
Loss = 2.8133e-01, PNorm = 67.0042, GNorm = 1.0960, lr_0 = 3.4787e-04
Loss = 2.8639e-01, PNorm = 67.0337, GNorm = 1.5312, lr_0 = 3.5162e-04
Loss = 2.9026e-01, PNorm = 67.0665, GNorm = 1.3247, lr_0 = 3.5538e-04
Loss = 2.8018e-01, PNorm = 67.1040, GNorm = 1.2007, lr_0 = 3.5913e-04
Loss = 2.7703e-01, PNorm = 67.1364, GNorm = 1.3197, lr_0 = 3.6288e-04
Loss = 2.7883e-01, PNorm = 67.1742, GNorm = 1.0874, lr_0 = 3.6662e-04
Loss = 2.3270e-01, PNorm = 67.2078, GNorm = 1.0047, lr_0 = 3.7037e-04
Loss = 2.7638e-01, PNorm = 67.2430, GNorm = 0.8953, lr_0 = 3.7413e-04
Loss = 3.0683e-01, PNorm = 67.2781, GNorm = 1.2206, lr_0 = 3.7788e-04
Loss = 2.8853e-01, PNorm = 67.3224, GNorm = 1.1860, lr_0 = 3.8163e-04
Loss = 3.1935e-01, PNorm = 67.3578, GNorm = 0.9482, lr_0 = 3.8537e-04
Loss = 2.9670e-01, PNorm = 67.4012, GNorm = 0.9265, lr_0 = 3.8912e-04
Loss = 2.4677e-01, PNorm = 67.4477, GNorm = 1.3289, lr_0 = 3.9287e-04
Loss = 2.7195e-01, PNorm = 67.4778, GNorm = 1.1439, lr_0 = 3.9663e-04
Loss = 2.6258e-01, PNorm = 67.5234, GNorm = 1.2882, lr_0 = 4.0038e-04
Loss = 2.9896e-01, PNorm = 67.5621, GNorm = 1.0607, lr_0 = 4.0413e-04
Loss = 2.6527e-01, PNorm = 67.6022, GNorm = 1.0451, lr_0 = 4.0787e-04
Loss = 2.5846e-01, PNorm = 67.6481, GNorm = 1.0286, lr_0 = 4.1162e-04
Loss = 2.8325e-01, PNorm = 67.6834, GNorm = 1.0121, lr_0 = 4.1537e-04
Loss = 2.7856e-01, PNorm = 67.7262, GNorm = 0.8024, lr_0 = 4.1913e-04
Loss = 2.3899e-01, PNorm = 67.7588, GNorm = 1.0814, lr_0 = 4.2288e-04
Loss = 2.7281e-01, PNorm = 67.7937, GNorm = 0.9758, lr_0 = 4.2662e-04
Loss = 2.8083e-01, PNorm = 67.8301, GNorm = 1.2431, lr_0 = 4.3037e-04
Loss = 2.6434e-01, PNorm = 67.8695, GNorm = 1.0033, lr_0 = 4.3412e-04
Loss = 2.5909e-01, PNorm = 67.9158, GNorm = 1.0865, lr_0 = 4.3788e-04
Loss = 2.4036e-01, PNorm = 67.9575, GNorm = 0.7141, lr_0 = 4.4163e-04
Loss = 2.7572e-01, PNorm = 68.0020, GNorm = 1.0960, lr_0 = 4.4538e-04
Loss = 2.8087e-01, PNorm = 68.0478, GNorm = 1.0025, lr_0 = 4.4912e-04
Loss = 2.4503e-01, PNorm = 68.0938, GNorm = 1.0536, lr_0 = 4.5287e-04
Loss = 2.5675e-01, PNorm = 68.1371, GNorm = 1.1933, lr_0 = 4.5662e-04
Loss = 2.5714e-01, PNorm = 68.1900, GNorm = 0.9164, lr_0 = 4.6038e-04
Loss = 2.4438e-01, PNorm = 68.2377, GNorm = 0.9878, lr_0 = 4.6413e-04
Loss = 2.6083e-01, PNorm = 68.2882, GNorm = 1.1450, lr_0 = 4.6787e-04
Loss = 2.4138e-01, PNorm = 68.3437, GNorm = 0.8988, lr_0 = 4.7162e-04
Loss = 2.7138e-01, PNorm = 68.3936, GNorm = 1.0284, lr_0 = 4.7537e-04
Loss = 2.9306e-01, PNorm = 68.4489, GNorm = 1.3950, lr_0 = 4.7913e-04
Loss = 2.3636e-01, PNorm = 68.4983, GNorm = 1.1170, lr_0 = 4.8288e-04
Loss = 2.4894e-01, PNorm = 68.5506, GNorm = 0.9506, lr_0 = 4.8663e-04
Loss = 2.4220e-01, PNorm = 68.6025, GNorm = 1.1411, lr_0 = 4.9038e-04
Loss = 2.5683e-01, PNorm = 68.6512, GNorm = 0.7653, lr_0 = 4.9412e-04
Loss = 2.5445e-01, PNorm = 68.7039, GNorm = 1.2677, lr_0 = 4.9788e-04
Loss = 2.5617e-01, PNorm = 68.7504, GNorm = 0.6543, lr_0 = 5.0163e-04
Loss = 2.2925e-01, PNorm = 68.8052, GNorm = 0.8074, lr_0 = 5.0538e-04
Loss = 2.7364e-01, PNorm = 68.8593, GNorm = 1.0552, lr_0 = 5.0913e-04
Loss = 2.8663e-01, PNorm = 68.9132, GNorm = 1.0476, lr_0 = 5.1287e-04
Loss = 2.6646e-01, PNorm = 68.9818, GNorm = 1.5154, lr_0 = 5.1663e-04
Loss = 2.2866e-01, PNorm = 69.0399, GNorm = 0.8640, lr_0 = 5.2038e-04
Loss = 2.7485e-01, PNorm = 69.0917, GNorm = 1.3042, lr_0 = 5.2413e-04
Loss = 2.3569e-01, PNorm = 69.1541, GNorm = 0.9667, lr_0 = 5.2788e-04
Loss = 2.6442e-01, PNorm = 69.2105, GNorm = 1.0616, lr_0 = 5.3162e-04
Loss = 2.3140e-01, PNorm = 69.2766, GNorm = 0.8731, lr_0 = 5.3538e-04
Loss = 2.7713e-01, PNorm = 69.3251, GNorm = 1.2450, lr_0 = 5.3912e-04
Loss = 2.8135e-01, PNorm = 69.3860, GNorm = 1.0203, lr_0 = 5.4288e-04
Loss = 2.4333e-01, PNorm = 69.4498, GNorm = 1.4507, lr_0 = 5.4663e-04
Loss = 2.3378e-01, PNorm = 69.5056, GNorm = 0.9342, lr_0 = 5.5038e-04
Validation mae = 0.315217
Epoch 1
Loss = 1.5604e-01, PNorm = 69.5639, GNorm = 0.8564, lr_0 = 5.5413e-04
Loss = 1.7772e-01, PNorm = 69.6059, GNorm = 0.7584, lr_0 = 5.5787e-04
Loss = 1.6834e-01, PNorm = 69.6641, GNorm = 0.7991, lr_0 = 5.6163e-04
Loss = 1.6377e-01, PNorm = 69.7149, GNorm = 0.6579, lr_0 = 5.6538e-04
Loss = 1.7420e-01, PNorm = 69.7718, GNorm = 0.9417, lr_0 = 5.6913e-04
Loss = 1.8833e-01, PNorm = 69.8263, GNorm = 1.0484, lr_0 = 5.7288e-04
Loss = 1.6918e-01, PNorm = 69.8923, GNorm = 0.8103, lr_0 = 5.7662e-04
Loss = 1.3723e-01, PNorm = 69.9538, GNorm = 0.5988, lr_0 = 5.8038e-04
Loss = 1.5936e-01, PNorm = 70.0074, GNorm = 0.9053, lr_0 = 5.8413e-04
Loss = 1.8689e-01, PNorm = 70.0656, GNorm = 0.8503, lr_0 = 5.8788e-04
Loss = 1.7569e-01, PNorm = 70.1305, GNorm = 0.8664, lr_0 = 5.9163e-04
Loss = 1.8609e-01, PNorm = 70.2029, GNorm = 0.7298, lr_0 = 5.9538e-04
Loss = 1.9896e-01, PNorm = 70.2729, GNorm = 0.7458, lr_0 = 5.9913e-04
Loss = 1.7857e-01, PNorm = 70.3509, GNorm = 0.9450, lr_0 = 6.0288e-04
Loss = 1.9171e-01, PNorm = 70.4301, GNorm = 0.7466, lr_0 = 6.0663e-04
Loss = 2.0145e-01, PNorm = 70.5112, GNorm = 0.9071, lr_0 = 6.1038e-04
Loss = 1.7822e-01, PNorm = 70.5873, GNorm = 0.9530, lr_0 = 6.1413e-04
Loss = 1.8936e-01, PNorm = 70.6739, GNorm = 0.9197, lr_0 = 6.1788e-04
Loss = 1.8607e-01, PNorm = 70.7490, GNorm = 0.8301, lr_0 = 6.2163e-04
Loss = 1.7378e-01, PNorm = 70.8294, GNorm = 0.7277, lr_0 = 6.2538e-04
Loss = 1.8022e-01, PNorm = 70.9096, GNorm = 1.1425, lr_0 = 6.2913e-04
Loss = 1.8944e-01, PNorm = 70.9954, GNorm = 0.8424, lr_0 = 6.3288e-04
Loss = 1.7216e-01, PNorm = 71.0794, GNorm = 0.8187, lr_0 = 6.3663e-04
Loss = 1.8497e-01, PNorm = 71.1601, GNorm = 0.8900, lr_0 = 6.4038e-04
Loss = 1.7277e-01, PNorm = 71.2405, GNorm = 0.8238, lr_0 = 6.4413e-04
Loss = 1.5677e-01, PNorm = 71.3209, GNorm = 0.7563, lr_0 = 6.4788e-04
Loss = 1.5722e-01, PNorm = 71.3989, GNorm = 0.8227, lr_0 = 6.5163e-04
Loss = 1.8829e-01, PNorm = 71.4778, GNorm = 0.8254, lr_0 = 6.5538e-04
Loss = 1.7502e-01, PNorm = 71.5692, GNorm = 0.8921, lr_0 = 6.5913e-04
Loss = 1.9576e-01, PNorm = 71.6519, GNorm = 1.0538, lr_0 = 6.6288e-04
Loss = 1.8094e-01, PNorm = 71.7527, GNorm = 1.0871, lr_0 = 6.6663e-04
Loss = 2.0753e-01, PNorm = 71.8414, GNorm = 1.1761, lr_0 = 6.7038e-04
Loss = 1.8054e-01, PNorm = 71.9617, GNorm = 1.2709, lr_0 = 6.7413e-04
Loss = 1.9441e-01, PNorm = 72.0459, GNorm = 0.9644, lr_0 = 6.7788e-04
Loss = 1.8961e-01, PNorm = 72.1473, GNorm = 1.0185, lr_0 = 6.8163e-04
Loss = 1.6671e-01, PNorm = 72.2510, GNorm = 0.7492, lr_0 = 6.8538e-04
Loss = 1.9191e-01, PNorm = 72.3459, GNorm = 0.9721, lr_0 = 6.8913e-04
Loss = 1.8133e-01, PNorm = 72.4486, GNorm = 0.8879, lr_0 = 6.9288e-04
Loss = 2.1256e-01, PNorm = 72.5599, GNorm = 1.0381, lr_0 = 6.9663e-04
Loss = 1.8400e-01, PNorm = 72.6768, GNorm = 0.7335, lr_0 = 7.0038e-04
Loss = 1.9152e-01, PNorm = 72.7811, GNorm = 0.6590, lr_0 = 7.0413e-04
Loss = 2.0344e-01, PNorm = 72.8923, GNorm = 0.8210, lr_0 = 7.0788e-04
Loss = 2.0024e-01, PNorm = 72.9899, GNorm = 0.6581, lr_0 = 7.1163e-04
Loss = 2.0978e-01, PNorm = 73.0976, GNorm = 1.0485, lr_0 = 7.1538e-04
Loss = 2.0321e-01, PNorm = 73.2050, GNorm = 1.0981, lr_0 = 7.1913e-04
Loss = 2.1222e-01, PNorm = 73.3141, GNorm = 0.8433, lr_0 = 7.2288e-04
Loss = 1.9329e-01, PNorm = 73.4234, GNorm = 0.8287, lr_0 = 7.2663e-04
Loss = 1.9243e-01, PNorm = 73.5360, GNorm = 1.1238, lr_0 = 7.3038e-04
Loss = 1.8092e-01, PNorm = 73.6396, GNorm = 0.9341, lr_0 = 7.3413e-04
Loss = 2.0941e-01, PNorm = 73.7430, GNorm = 1.1388, lr_0 = 7.3788e-04
Loss = 2.0679e-01, PNorm = 73.8675, GNorm = 0.8917, lr_0 = 7.4163e-04
Loss = 2.2089e-01, PNorm = 73.9899, GNorm = 0.8569, lr_0 = 7.4538e-04
Loss = 2.2099e-01, PNorm = 74.1136, GNorm = 1.4899, lr_0 = 7.4913e-04
Loss = 2.3426e-01, PNorm = 74.2263, GNorm = 0.6923, lr_0 = 7.5288e-04
Loss = 1.8910e-01, PNorm = 74.3473, GNorm = 0.9354, lr_0 = 7.5663e-04
Loss = 2.0918e-01, PNorm = 74.4389, GNorm = 0.8152, lr_0 = 7.6038e-04
Loss = 2.0677e-01, PNorm = 74.5571, GNorm = 0.9510, lr_0 = 7.6413e-04
Loss = 1.9555e-01, PNorm = 74.6673, GNorm = 0.9189, lr_0 = 7.6788e-04
Loss = 2.2174e-01, PNorm = 74.7822, GNorm = 0.7949, lr_0 = 7.7163e-04
Loss = 1.8073e-01, PNorm = 74.9001, GNorm = 0.7483, lr_0 = 7.7538e-04
Loss = 1.8559e-01, PNorm = 75.0135, GNorm = 0.9165, lr_0 = 7.7913e-04
Loss = 2.0211e-01, PNorm = 75.1255, GNorm = 0.7421, lr_0 = 7.8288e-04
Loss = 2.3339e-01, PNorm = 75.2512, GNorm = 1.2221, lr_0 = 7.8663e-04
Loss = 2.1196e-01, PNorm = 75.3729, GNorm = 0.8906, lr_0 = 7.9038e-04
Loss = 2.1008e-01, PNorm = 75.4989, GNorm = 0.8847, lr_0 = 7.9413e-04
Loss = 2.1325e-01, PNorm = 75.6227, GNorm = 0.9082, lr_0 = 7.9788e-04
Loss = 2.0006e-01, PNorm = 75.7552, GNorm = 1.1637, lr_0 = 8.0163e-04
Loss = 1.8929e-01, PNorm = 75.8658, GNorm = 0.6977, lr_0 = 8.0538e-04
Loss = 2.0497e-01, PNorm = 75.9865, GNorm = 0.7672, lr_0 = 8.0913e-04
Loss = 1.8938e-01, PNorm = 76.1006, GNorm = 0.8845, lr_0 = 8.1288e-04
Loss = 2.0549e-01, PNorm = 76.2263, GNorm = 0.8552, lr_0 = 8.1663e-04
Loss = 1.8670e-01, PNorm = 76.3561, GNorm = 1.1677, lr_0 = 8.2038e-04
Loss = 1.8858e-01, PNorm = 76.4798, GNorm = 0.8119, lr_0 = 8.2413e-04
Loss = 2.2164e-01, PNorm = 76.6022, GNorm = 1.5558, lr_0 = 8.2788e-04
Loss = 1.9709e-01, PNorm = 76.7303, GNorm = 1.0493, lr_0 = 8.3163e-04
Loss = 2.0141e-01, PNorm = 76.8392, GNorm = 1.0800, lr_0 = 8.3538e-04
Loss = 2.2213e-01, PNorm = 76.9688, GNorm = 0.8340, lr_0 = 8.3913e-04
Loss = 1.9950e-01, PNorm = 77.0911, GNorm = 1.0313, lr_0 = 8.4288e-04
Loss = 1.8518e-01, PNorm = 77.2080, GNorm = 0.5954, lr_0 = 8.4663e-04
Loss = 2.1761e-01, PNorm = 77.3387, GNorm = 0.8481, lr_0 = 8.5038e-04
Loss = 2.0634e-01, PNorm = 77.4649, GNorm = 0.5952, lr_0 = 8.5413e-04
Loss = 1.8199e-01, PNorm = 77.6059, GNorm = 0.6890, lr_0 = 8.5788e-04
Loss = 1.9184e-01, PNorm = 77.7224, GNorm = 0.6661, lr_0 = 8.6163e-04
Loss = 2.1977e-01, PNorm = 77.8723, GNorm = 1.1003, lr_0 = 8.6538e-04
Loss = 2.2137e-01, PNorm = 78.0095, GNorm = 1.0487, lr_0 = 8.6913e-04
Loss = 2.3333e-01, PNorm = 78.1555, GNorm = 0.9187, lr_0 = 8.7288e-04
Loss = 2.1497e-01, PNorm = 78.2954, GNorm = 0.5576, lr_0 = 8.7663e-04
Loss = 1.8658e-01, PNorm = 78.4407, GNorm = 1.0615, lr_0 = 8.8038e-04
Loss = 1.8310e-01, PNorm = 78.5678, GNorm = 0.7395, lr_0 = 8.8413e-04
Loss = 2.1877e-01, PNorm = 78.6959, GNorm = 0.7981, lr_0 = 8.8788e-04
Loss = 2.0589e-01, PNorm = 78.8312, GNorm = 1.0106, lr_0 = 8.9163e-04
Loss = 2.0035e-01, PNorm = 78.9675, GNorm = 0.7786, lr_0 = 8.9538e-04
Loss = 2.2540e-01, PNorm = 79.1073, GNorm = 0.8568, lr_0 = 8.9913e-04
Loss = 2.2883e-01, PNorm = 79.2508, GNorm = 0.7105, lr_0 = 9.0288e-04
Loss = 2.0388e-01, PNorm = 79.4070, GNorm = 0.6601, lr_0 = 9.0663e-04
Loss = 2.0643e-01, PNorm = 79.5455, GNorm = 1.0153, lr_0 = 9.1038e-04
Loss = 2.1407e-01, PNorm = 79.6907, GNorm = 0.6445, lr_0 = 9.1413e-04
Loss = 2.1027e-01, PNorm = 79.8230, GNorm = 1.6689, lr_0 = 9.1788e-04
Loss = 2.1784e-01, PNorm = 79.9630, GNorm = 1.7071, lr_0 = 9.2163e-04
Loss = 2.0915e-01, PNorm = 80.1043, GNorm = 0.8201, lr_0 = 9.2538e-04
Loss = 2.0352e-01, PNorm = 80.2452, GNorm = 0.7929, lr_0 = 9.2913e-04
Loss = 2.0629e-01, PNorm = 80.3813, GNorm = 0.8210, lr_0 = 9.3288e-04
Loss = 2.1429e-01, PNorm = 80.5312, GNorm = 1.1215, lr_0 = 9.3663e-04
Loss = 2.1266e-01, PNorm = 80.6787, GNorm = 0.6126, lr_0 = 9.4038e-04
Loss = 2.1154e-01, PNorm = 80.8195, GNorm = 0.8069, lr_0 = 9.4413e-04
Loss = 2.0677e-01, PNorm = 80.9669, GNorm = 0.9125, lr_0 = 9.4788e-04
Loss = 2.0568e-01, PNorm = 81.0989, GNorm = 0.9677, lr_0 = 9.5163e-04
Loss = 2.1715e-01, PNorm = 81.2454, GNorm = 0.8474, lr_0 = 9.5538e-04
Loss = 1.8086e-01, PNorm = 81.3873, GNorm = 0.6080, lr_0 = 9.5913e-04
Loss = 2.0019e-01, PNorm = 81.5248, GNorm = 0.5437, lr_0 = 9.6288e-04
Loss = 2.0675e-01, PNorm = 81.6655, GNorm = 0.9423, lr_0 = 9.6663e-04
Loss = 2.1176e-01, PNorm = 81.8210, GNorm = 0.5949, lr_0 = 9.7038e-04
Loss = 2.0832e-01, PNorm = 81.9874, GNorm = 1.6073, lr_0 = 9.7413e-04
Loss = 1.9015e-01, PNorm = 82.1281, GNorm = 1.1938, lr_0 = 9.7788e-04
Loss = 2.0356e-01, PNorm = 82.2844, GNorm = 0.7929, lr_0 = 9.8163e-04
Loss = 2.4218e-01, PNorm = 82.4401, GNorm = 0.7320, lr_0 = 9.8537e-04
Loss = 1.8630e-01, PNorm = 82.6049, GNorm = 0.6478, lr_0 = 9.8912e-04
Loss = 2.4381e-01, PNorm = 82.7548, GNorm = 0.8540, lr_0 = 9.9288e-04
Loss = 2.3412e-01, PNorm = 82.9150, GNorm = 0.9700, lr_0 = 9.9663e-04
Loss = 2.0901e-01, PNorm = 83.0732, GNorm = 0.6533, lr_0 = 9.9993e-04
Validation mae = 0.309207
Epoch 2
Loss = 1.3699e-01, PNorm = 83.2345, GNorm = 0.5948, lr_0 = 9.9925e-04
Loss = 1.3619e-01, PNorm = 83.3661, GNorm = 0.6303, lr_0 = 9.9856e-04
Loss = 1.1575e-01, PNorm = 83.4828, GNorm = 0.6251, lr_0 = 9.9788e-04
Loss = 1.3373e-01, PNorm = 83.5910, GNorm = 0.6455, lr_0 = 9.9719e-04
Loss = 1.3882e-01, PNorm = 83.7081, GNorm = 0.5285, lr_0 = 9.9651e-04
Loss = 1.5090e-01, PNorm = 83.8155, GNorm = 0.7917, lr_0 = 9.9583e-04
Loss = 1.2348e-01, PNorm = 83.9375, GNorm = 0.8911, lr_0 = 9.9515e-04
Loss = 1.1860e-01, PNorm = 84.0430, GNorm = 0.6280, lr_0 = 9.9446e-04
Loss = 1.1305e-01, PNorm = 84.1467, GNorm = 0.5891, lr_0 = 9.9378e-04
Loss = 1.1709e-01, PNorm = 84.2507, GNorm = 0.5646, lr_0 = 9.9310e-04
Loss = 1.0945e-01, PNorm = 84.3504, GNorm = 0.5268, lr_0 = 9.9242e-04
Loss = 1.1839e-01, PNorm = 84.4488, GNorm = 0.4461, lr_0 = 9.9174e-04
Loss = 1.2659e-01, PNorm = 84.5703, GNorm = 0.5415, lr_0 = 9.9106e-04
Loss = 1.2649e-01, PNorm = 84.6779, GNorm = 0.4712, lr_0 = 9.9038e-04
Loss = 1.3760e-01, PNorm = 84.8152, GNorm = 1.1364, lr_0 = 9.8971e-04
Loss = 1.3844e-01, PNorm = 84.9364, GNorm = 1.4599, lr_0 = 9.8903e-04
Loss = 1.3091e-01, PNorm = 85.0758, GNorm = 0.6583, lr_0 = 9.8835e-04
Loss = 1.4505e-01, PNorm = 85.2000, GNorm = 0.9573, lr_0 = 9.8767e-04
Loss = 1.2291e-01, PNorm = 85.3269, GNorm = 0.8039, lr_0 = 9.8700e-04
Loss = 1.1530e-01, PNorm = 85.4495, GNorm = 0.9810, lr_0 = 9.8632e-04
Loss = 1.4003e-01, PNorm = 85.5804, GNorm = 0.4040, lr_0 = 9.8564e-04
Loss = 1.2129e-01, PNorm = 85.6981, GNorm = 0.5333, lr_0 = 9.8497e-04
Loss = 1.2963e-01, PNorm = 85.8311, GNorm = 0.5760, lr_0 = 9.8429e-04
Loss = 1.1771e-01, PNorm = 85.9555, GNorm = 0.5251, lr_0 = 9.8362e-04
Loss = 1.3397e-01, PNorm = 86.0849, GNorm = 0.5662, lr_0 = 9.8295e-04
Loss = 1.3502e-01, PNorm = 86.2163, GNorm = 0.8589, lr_0 = 9.8227e-04
Loss = 1.2158e-01, PNorm = 86.3554, GNorm = 0.6043, lr_0 = 9.8160e-04
Loss = 1.4633e-01, PNorm = 86.4847, GNorm = 0.6773, lr_0 = 9.8093e-04
Loss = 1.3136e-01, PNorm = 86.6339, GNorm = 0.6935, lr_0 = 9.8026e-04
Loss = 1.2220e-01, PNorm = 86.7619, GNorm = 0.8244, lr_0 = 9.7958e-04
Loss = 1.4692e-01, PNorm = 86.8956, GNorm = 0.6962, lr_0 = 9.7891e-04
Loss = 1.2980e-01, PNorm = 87.0413, GNorm = 0.9715, lr_0 = 9.7824e-04
Loss = 1.4184e-01, PNorm = 87.1791, GNorm = 0.7335, lr_0 = 9.7757e-04
Loss = 1.3634e-01, PNorm = 87.3245, GNorm = 0.6500, lr_0 = 9.7690e-04
Loss = 1.3906e-01, PNorm = 87.4596, GNorm = 0.5375, lr_0 = 9.7623e-04
Loss = 1.2345e-01, PNorm = 87.5979, GNorm = 1.0678, lr_0 = 9.7556e-04
Loss = 1.4086e-01, PNorm = 87.7172, GNorm = 1.0970, lr_0 = 9.7490e-04
Loss = 1.4037e-01, PNorm = 87.8484, GNorm = 0.9510, lr_0 = 9.7423e-04
Loss = 1.2751e-01, PNorm = 87.9831, GNorm = 0.9339, lr_0 = 9.7356e-04
Loss = 1.2975e-01, PNorm = 88.1205, GNorm = 0.4797, lr_0 = 9.7289e-04
Loss = 1.2490e-01, PNorm = 88.2483, GNorm = 0.4889, lr_0 = 9.7223e-04
Loss = 1.3495e-01, PNorm = 88.3839, GNorm = 0.5798, lr_0 = 9.7156e-04
Loss = 1.3040e-01, PNorm = 88.5061, GNorm = 0.4934, lr_0 = 9.7090e-04
Loss = 1.2061e-01, PNorm = 88.6376, GNorm = 0.7598, lr_0 = 9.7023e-04
Loss = 1.3837e-01, PNorm = 88.7691, GNorm = 0.6899, lr_0 = 9.6957e-04
Loss = 1.3535e-01, PNorm = 88.8981, GNorm = 0.5932, lr_0 = 9.6890e-04
Loss = 1.4576e-01, PNorm = 89.0338, GNorm = 0.8139, lr_0 = 9.6824e-04
Loss = 1.3444e-01, PNorm = 89.1594, GNorm = 0.5890, lr_0 = 9.6757e-04
Loss = 1.3575e-01, PNorm = 89.2918, GNorm = 0.8243, lr_0 = 9.6691e-04
Loss = 1.4491e-01, PNorm = 89.4207, GNorm = 0.9915, lr_0 = 9.6625e-04
Loss = 1.3594e-01, PNorm = 89.5531, GNorm = 0.7610, lr_0 = 9.6559e-04
Loss = 1.2928e-01, PNorm = 89.6770, GNorm = 0.4790, lr_0 = 9.6493e-04
Loss = 1.3109e-01, PNorm = 89.8018, GNorm = 0.6003, lr_0 = 9.6427e-04
Loss = 1.3018e-01, PNorm = 89.9432, GNorm = 0.5179, lr_0 = 9.6360e-04
Loss = 1.3763e-01, PNorm = 90.0870, GNorm = 0.5778, lr_0 = 9.6294e-04
Loss = 1.3416e-01, PNorm = 90.2226, GNorm = 1.5536, lr_0 = 9.6228e-04
Loss = 1.4486e-01, PNorm = 90.3507, GNorm = 0.6587, lr_0 = 9.6163e-04
Loss = 1.3387e-01, PNorm = 90.4936, GNorm = 0.6511, lr_0 = 9.6097e-04
Loss = 1.4247e-01, PNorm = 90.6071, GNorm = 0.7340, lr_0 = 9.6031e-04
Loss = 1.5283e-01, PNorm = 90.7359, GNorm = 0.9403, lr_0 = 9.5965e-04
Loss = 1.3152e-01, PNorm = 90.8618, GNorm = 0.5701, lr_0 = 9.5899e-04
Loss = 1.3089e-01, PNorm = 90.9856, GNorm = 0.7372, lr_0 = 9.5834e-04
Loss = 1.4050e-01, PNorm = 91.0975, GNorm = 1.5717, lr_0 = 9.5768e-04
Loss = 1.4753e-01, PNorm = 91.2234, GNorm = 0.7622, lr_0 = 9.5702e-04
Loss = 1.4370e-01, PNorm = 91.3458, GNorm = 0.5464, lr_0 = 9.5637e-04
Loss = 1.3463e-01, PNorm = 91.4657, GNorm = 0.5465, lr_0 = 9.5571e-04
Loss = 1.5635e-01, PNorm = 91.5971, GNorm = 1.2464, lr_0 = 9.5506e-04
Loss = 1.3590e-01, PNorm = 91.7086, GNorm = 0.6684, lr_0 = 9.5440e-04
Loss = 1.2534e-01, PNorm = 91.8308, GNorm = 0.6477, lr_0 = 9.5375e-04
Loss = 1.5269e-01, PNorm = 91.9366, GNorm = 1.0802, lr_0 = 9.5310e-04
Loss = 1.4041e-01, PNorm = 92.0687, GNorm = 1.0970, lr_0 = 9.5244e-04
Loss = 1.2723e-01, PNorm = 92.1856, GNorm = 0.6617, lr_0 = 9.5179e-04
Loss = 1.3746e-01, PNorm = 92.2873, GNorm = 0.5391, lr_0 = 9.5114e-04
Loss = 1.6001e-01, PNorm = 92.4146, GNorm = 0.8380, lr_0 = 9.5049e-04
Loss = 1.2842e-01, PNorm = 92.5472, GNorm = 1.0249, lr_0 = 9.4984e-04
Loss = 1.3911e-01, PNorm = 92.6841, GNorm = 0.5672, lr_0 = 9.4919e-04
Loss = 1.2931e-01, PNorm = 92.8166, GNorm = 1.0141, lr_0 = 9.4854e-04
Loss = 1.5154e-01, PNorm = 92.9388, GNorm = 0.6658, lr_0 = 9.4789e-04
Loss = 1.4896e-01, PNorm = 93.0858, GNorm = 1.1949, lr_0 = 9.4724e-04
Loss = 1.3540e-01, PNorm = 93.2156, GNorm = 0.7606, lr_0 = 9.4659e-04
Loss = 1.3136e-01, PNorm = 93.3436, GNorm = 0.9751, lr_0 = 9.4594e-04
Loss = 1.3391e-01, PNorm = 93.4739, GNorm = 0.7267, lr_0 = 9.4529e-04
Loss = 1.4233e-01, PNorm = 93.5950, GNorm = 0.6276, lr_0 = 9.4464e-04
Loss = 1.3042e-01, PNorm = 93.7106, GNorm = 0.8158, lr_0 = 9.4400e-04
Loss = 1.6602e-01, PNorm = 93.8365, GNorm = 0.7190, lr_0 = 9.4335e-04
Loss = 1.2930e-01, PNorm = 93.9718, GNorm = 0.5852, lr_0 = 9.4270e-04
Loss = 1.4838e-01, PNorm = 94.0870, GNorm = 0.6749, lr_0 = 9.4206e-04
Loss = 1.3546e-01, PNorm = 94.2008, GNorm = 0.8560, lr_0 = 9.4141e-04
Loss = 1.4333e-01, PNorm = 94.3230, GNorm = 0.7188, lr_0 = 9.4077e-04
Loss = 1.2972e-01, PNorm = 94.4457, GNorm = 0.7881, lr_0 = 9.4012e-04
Loss = 1.5305e-01, PNorm = 94.5592, GNorm = 1.1829, lr_0 = 9.3948e-04
Loss = 1.4390e-01, PNorm = 94.6760, GNorm = 0.7402, lr_0 = 9.3884e-04
Loss = 1.5572e-01, PNorm = 94.8071, GNorm = 0.9094, lr_0 = 9.3819e-04
Loss = 1.5098e-01, PNorm = 94.9317, GNorm = 0.9267, lr_0 = 9.3755e-04
Loss = 1.6229e-01, PNorm = 95.0638, GNorm = 0.5452, lr_0 = 9.3691e-04
Loss = 1.4952e-01, PNorm = 95.1972, GNorm = 0.6521, lr_0 = 9.3627e-04
Loss = 1.5949e-01, PNorm = 95.3384, GNorm = 1.5711, lr_0 = 9.3562e-04
Loss = 1.5849e-01, PNorm = 95.4728, GNorm = 1.0720, lr_0 = 9.3498e-04
Loss = 1.4743e-01, PNorm = 95.6127, GNorm = 0.5258, lr_0 = 9.3434e-04
Loss = 1.4616e-01, PNorm = 95.7353, GNorm = 0.9894, lr_0 = 9.3370e-04
Loss = 1.5785e-01, PNorm = 95.8770, GNorm = 0.8020, lr_0 = 9.3306e-04
Loss = 1.3273e-01, PNorm = 96.0066, GNorm = 0.4496, lr_0 = 9.3242e-04
Loss = 1.5775e-01, PNorm = 96.1242, GNorm = 1.6060, lr_0 = 9.3178e-04
Loss = 1.4190e-01, PNorm = 96.2600, GNorm = 0.6284, lr_0 = 9.3115e-04
Loss = 1.3288e-01, PNorm = 96.3839, GNorm = 0.6984, lr_0 = 9.3051e-04
Loss = 1.4078e-01, PNorm = 96.5103, GNorm = 0.4939, lr_0 = 9.2987e-04
Loss = 1.4115e-01, PNorm = 96.6226, GNorm = 0.6478, lr_0 = 9.2923e-04
Loss = 1.4485e-01, PNorm = 96.7436, GNorm = 0.5369, lr_0 = 9.2860e-04
Loss = 1.4777e-01, PNorm = 96.8683, GNorm = 0.6800, lr_0 = 9.2796e-04
Loss = 1.3958e-01, PNorm = 97.0001, GNorm = 0.5556, lr_0 = 9.2733e-04
Loss = 1.4288e-01, PNorm = 97.1286, GNorm = 0.6514, lr_0 = 9.2669e-04
Loss = 1.4042e-01, PNorm = 97.2475, GNorm = 0.7708, lr_0 = 9.2606e-04
Loss = 1.3841e-01, PNorm = 97.3818, GNorm = 1.0202, lr_0 = 9.2542e-04
Loss = 1.5021e-01, PNorm = 97.5019, GNorm = 0.7069, lr_0 = 9.2479e-04
Loss = 1.5016e-01, PNorm = 97.6281, GNorm = 0.6767, lr_0 = 9.2415e-04
Loss = 1.4185e-01, PNorm = 97.7522, GNorm = 0.7342, lr_0 = 9.2352e-04
Loss = 1.3389e-01, PNorm = 97.8800, GNorm = 0.5056, lr_0 = 9.2289e-04
Loss = 1.4059e-01, PNorm = 97.9904, GNorm = 0.6174, lr_0 = 9.2226e-04
Loss = 1.3475e-01, PNorm = 98.1203, GNorm = 0.8401, lr_0 = 9.2162e-04
Loss = 1.4383e-01, PNorm = 98.2400, GNorm = 0.6550, lr_0 = 9.2099e-04
Validation mae = 0.298762
Epoch 3
Loss = 8.7043e-02, PNorm = 98.3529, GNorm = 0.4992, lr_0 = 9.2036e-04
Loss = 8.7540e-02, PNorm = 98.4561, GNorm = 0.4193, lr_0 = 9.1973e-04
Loss = 8.5816e-02, PNorm = 98.5358, GNorm = 0.5198, lr_0 = 9.1910e-04
Loss = 9.8197e-02, PNorm = 98.6289, GNorm = 0.5519, lr_0 = 9.1847e-04
Loss = 8.6361e-02, PNorm = 98.7232, GNorm = 0.3830, lr_0 = 9.1784e-04
Loss = 1.0171e-01, PNorm = 98.8196, GNorm = 0.4018, lr_0 = 9.1721e-04
Loss = 7.4393e-02, PNorm = 98.9123, GNorm = 0.6068, lr_0 = 9.1658e-04
Loss = 7.9617e-02, PNorm = 98.9961, GNorm = 0.7312, lr_0 = 9.1596e-04
Loss = 8.3073e-02, PNorm = 99.0855, GNorm = 0.4358, lr_0 = 9.1533e-04
Loss = 8.2606e-02, PNorm = 99.1648, GNorm = 0.6243, lr_0 = 9.1470e-04
Loss = 7.7215e-02, PNorm = 99.2553, GNorm = 0.3973, lr_0 = 9.1408e-04
Loss = 8.4474e-02, PNorm = 99.3321, GNorm = 0.7225, lr_0 = 9.1345e-04
Loss = 7.1841e-02, PNorm = 99.4087, GNorm = 0.3263, lr_0 = 9.1282e-04
Loss = 8.6031e-02, PNorm = 99.4755, GNorm = 0.7499, lr_0 = 9.1220e-04
Loss = 7.4727e-02, PNorm = 99.5505, GNorm = 0.3048, lr_0 = 9.1157e-04
Loss = 7.2044e-02, PNorm = 99.6281, GNorm = 0.6388, lr_0 = 9.1095e-04
Loss = 7.5863e-02, PNorm = 99.7047, GNorm = 0.6737, lr_0 = 9.1032e-04
Loss = 8.0855e-02, PNorm = 99.7814, GNorm = 0.5592, lr_0 = 9.0970e-04
Loss = 7.4708e-02, PNorm = 99.8620, GNorm = 1.0283, lr_0 = 9.0908e-04
Loss = 8.1122e-02, PNorm = 99.9428, GNorm = 0.8003, lr_0 = 9.0846e-04
Loss = 7.8954e-02, PNorm = 100.0217, GNorm = 0.6837, lr_0 = 9.0783e-04
Loss = 9.0858e-02, PNorm = 100.1048, GNorm = 0.8996, lr_0 = 9.0721e-04
Loss = 7.5770e-02, PNorm = 100.1778, GNorm = 0.7749, lr_0 = 9.0659e-04
Loss = 8.3235e-02, PNorm = 100.2710, GNorm = 0.5615, lr_0 = 9.0597e-04
Loss = 7.9343e-02, PNorm = 100.3568, GNorm = 0.5270, lr_0 = 9.0535e-04
Loss = 9.1175e-02, PNorm = 100.4487, GNorm = 0.5904, lr_0 = 9.0473e-04
Loss = 7.2367e-02, PNorm = 100.5266, GNorm = 0.6510, lr_0 = 9.0411e-04
Loss = 7.7749e-02, PNorm = 100.5966, GNorm = 0.4058, lr_0 = 9.0349e-04
Loss = 8.0369e-02, PNorm = 100.6786, GNorm = 0.3335, lr_0 = 9.0287e-04
Loss = 7.5075e-02, PNorm = 100.7497, GNorm = 0.3419, lr_0 = 9.0225e-04
Loss = 8.2382e-02, PNorm = 100.8236, GNorm = 0.4677, lr_0 = 9.0163e-04
Loss = 9.6042e-02, PNorm = 100.9042, GNorm = 0.7066, lr_0 = 9.0102e-04
Loss = 9.6551e-02, PNorm = 100.9953, GNorm = 0.8242, lr_0 = 9.0040e-04
Loss = 8.2978e-02, PNorm = 101.0805, GNorm = 0.4875, lr_0 = 8.9978e-04
Loss = 8.4453e-02, PNorm = 101.1637, GNorm = 0.5736, lr_0 = 8.9916e-04
Loss = 9.1667e-02, PNorm = 101.2568, GNorm = 0.6316, lr_0 = 8.9855e-04
Loss = 8.3095e-02, PNorm = 101.3483, GNorm = 0.3623, lr_0 = 8.9793e-04
Loss = 7.5744e-02, PNorm = 101.4368, GNorm = 0.5522, lr_0 = 8.9732e-04
Loss = 8.5028e-02, PNorm = 101.5279, GNorm = 0.6078, lr_0 = 8.9670e-04
Loss = 8.6118e-02, PNorm = 101.6223, GNorm = 0.9169, lr_0 = 8.9609e-04
Loss = 9.0081e-02, PNorm = 101.7111, GNorm = 1.2741, lr_0 = 8.9548e-04
Loss = 7.6034e-02, PNorm = 101.7992, GNorm = 0.5632, lr_0 = 8.9486e-04
Loss = 9.8688e-02, PNorm = 101.8856, GNorm = 0.6604, lr_0 = 8.9425e-04
Loss = 7.6194e-02, PNorm = 101.9814, GNorm = 0.5213, lr_0 = 8.9364e-04
Loss = 8.0600e-02, PNorm = 102.0671, GNorm = 0.5208, lr_0 = 8.9302e-04
Loss = 7.6114e-02, PNorm = 102.1532, GNorm = 0.4374, lr_0 = 8.9241e-04
Loss = 8.0916e-02, PNorm = 102.2389, GNorm = 0.4234, lr_0 = 8.9180e-04
Loss = 8.3571e-02, PNorm = 102.3365, GNorm = 0.5554, lr_0 = 8.9119e-04
Loss = 8.2791e-02, PNorm = 102.4246, GNorm = 0.4370, lr_0 = 8.9058e-04
Loss = 9.6467e-02, PNorm = 102.5266, GNorm = 0.5221, lr_0 = 8.8997e-04
Loss = 8.1183e-02, PNorm = 102.6219, GNorm = 0.5667, lr_0 = 8.8936e-04
Loss = 7.9181e-02, PNorm = 102.7208, GNorm = 0.8823, lr_0 = 8.8875e-04
Loss = 9.1859e-02, PNorm = 102.8158, GNorm = 0.6396, lr_0 = 8.8814e-04
Loss = 8.5177e-02, PNorm = 102.9301, GNorm = 0.4967, lr_0 = 8.8753e-04
Loss = 8.0585e-02, PNorm = 103.0239, GNorm = 0.4609, lr_0 = 8.8693e-04
Loss = 8.7314e-02, PNorm = 103.1308, GNorm = 0.8811, lr_0 = 8.8632e-04
Loss = 8.5637e-02, PNorm = 103.2185, GNorm = 0.4936, lr_0 = 8.8571e-04
Loss = 8.7776e-02, PNorm = 103.3228, GNorm = 0.7112, lr_0 = 8.8510e-04
Loss = 9.1749e-02, PNorm = 103.4188, GNorm = 0.7849, lr_0 = 8.8450e-04
Loss = 8.9947e-02, PNorm = 103.5243, GNorm = 0.7679, lr_0 = 8.8389e-04
Loss = 9.1490e-02, PNorm = 103.6247, GNorm = 0.7587, lr_0 = 8.8329e-04
Loss = 8.6016e-02, PNorm = 103.7315, GNorm = 0.6059, lr_0 = 8.8268e-04
Loss = 8.6054e-02, PNorm = 103.8322, GNorm = 0.9040, lr_0 = 8.8208e-04
Loss = 8.3627e-02, PNorm = 103.9384, GNorm = 0.4292, lr_0 = 8.8147e-04
Loss = 9.1320e-02, PNorm = 104.0457, GNorm = 0.7402, lr_0 = 8.8087e-04
Loss = 8.8546e-02, PNorm = 104.1485, GNorm = 1.4477, lr_0 = 8.8026e-04
Loss = 8.3219e-02, PNorm = 104.2622, GNorm = 0.5060, lr_0 = 8.7966e-04
Loss = 1.1151e-01, PNorm = 104.3616, GNorm = 0.7197, lr_0 = 8.7906e-04
Loss = 8.6413e-02, PNorm = 104.4723, GNorm = 0.7786, lr_0 = 8.7846e-04
Loss = 8.1413e-02, PNorm = 104.5855, GNorm = 0.7315, lr_0 = 8.7785e-04
Loss = 9.9285e-02, PNorm = 104.6838, GNorm = 0.4058, lr_0 = 8.7725e-04
Loss = 8.2957e-02, PNorm = 104.7890, GNorm = 0.8834, lr_0 = 8.7665e-04
Loss = 1.0173e-01, PNorm = 104.9000, GNorm = 0.7181, lr_0 = 8.7605e-04
Loss = 9.0087e-02, PNorm = 105.0029, GNorm = 0.8512, lr_0 = 8.7545e-04
Loss = 7.9579e-02, PNorm = 105.1156, GNorm = 0.3811, lr_0 = 8.7485e-04
Loss = 1.0384e-01, PNorm = 105.2116, GNorm = 0.5076, lr_0 = 8.7425e-04
Loss = 8.1257e-02, PNorm = 105.3096, GNorm = 0.5663, lr_0 = 8.7365e-04
Loss = 9.1407e-02, PNorm = 105.4109, GNorm = 0.4813, lr_0 = 8.7306e-04
Loss = 7.4213e-02, PNorm = 105.5188, GNorm = 0.3045, lr_0 = 8.7246e-04
Loss = 7.8279e-02, PNorm = 105.6082, GNorm = 0.4262, lr_0 = 8.7186e-04
Loss = 8.8006e-02, PNorm = 105.7104, GNorm = 0.7549, lr_0 = 8.7126e-04
Loss = 9.0685e-02, PNorm = 105.8062, GNorm = 0.8583, lr_0 = 8.7067e-04
Loss = 9.6954e-02, PNorm = 105.9113, GNorm = 0.6036, lr_0 = 8.7007e-04
Loss = 8.4661e-02, PNorm = 106.0055, GNorm = 0.6579, lr_0 = 8.6947e-04
Loss = 9.4522e-02, PNorm = 106.1077, GNorm = 0.4664, lr_0 = 8.6888e-04
Loss = 9.3015e-02, PNorm = 106.2160, GNorm = 0.4450, lr_0 = 8.6828e-04
Loss = 9.7305e-02, PNorm = 106.3174, GNorm = 0.6275, lr_0 = 8.6769e-04
Loss = 1.0195e-01, PNorm = 106.4210, GNorm = 0.5980, lr_0 = 8.6709e-04
Loss = 1.0508e-01, PNorm = 106.5356, GNorm = 0.6518, lr_0 = 8.6650e-04
Loss = 8.9991e-02, PNorm = 106.6493, GNorm = 0.7518, lr_0 = 8.6590e-04
Loss = 7.6137e-02, PNorm = 106.7550, GNorm = 0.6561, lr_0 = 8.6531e-04
Loss = 9.0551e-02, PNorm = 106.8612, GNorm = 1.3601, lr_0 = 8.6472e-04
Loss = 9.9641e-02, PNorm = 106.9598, GNorm = 1.0505, lr_0 = 8.6413e-04
Loss = 1.0008e-01, PNorm = 107.0666, GNorm = 0.5490, lr_0 = 8.6353e-04
Loss = 8.7170e-02, PNorm = 107.1688, GNorm = 0.9781, lr_0 = 8.6294e-04
Loss = 8.6757e-02, PNorm = 107.2716, GNorm = 0.8417, lr_0 = 8.6235e-04
Loss = 1.1087e-01, PNorm = 107.3703, GNorm = 0.4356, lr_0 = 8.6176e-04
Loss = 8.7128e-02, PNorm = 107.4785, GNorm = 0.7572, lr_0 = 8.6117e-04
Loss = 8.6746e-02, PNorm = 107.5775, GNorm = 0.5829, lr_0 = 8.6058e-04
Loss = 8.3269e-02, PNorm = 107.6826, GNorm = 0.4712, lr_0 = 8.5999e-04
Loss = 9.4682e-02, PNorm = 107.7850, GNorm = 0.9084, lr_0 = 8.5940e-04
Loss = 8.9136e-02, PNorm = 107.8859, GNorm = 0.9388, lr_0 = 8.5881e-04
Loss = 8.7525e-02, PNorm = 107.9920, GNorm = 0.4755, lr_0 = 8.5823e-04
Loss = 9.1010e-02, PNorm = 108.0929, GNorm = 0.4912, lr_0 = 8.5764e-04
Loss = 9.7403e-02, PNorm = 108.2163, GNorm = 0.6503, lr_0 = 8.5705e-04
Loss = 8.8296e-02, PNorm = 108.3240, GNorm = 0.5252, lr_0 = 8.5646e-04
Loss = 9.4352e-02, PNorm = 108.4378, GNorm = 1.6263, lr_0 = 8.5588e-04
Loss = 1.0537e-01, PNorm = 108.5315, GNorm = 0.5885, lr_0 = 8.5529e-04
Loss = 9.3785e-02, PNorm = 108.6536, GNorm = 0.5403, lr_0 = 8.5470e-04
Loss = 9.6482e-02, PNorm = 108.7610, GNorm = 0.4294, lr_0 = 8.5412e-04
Loss = 9.9915e-02, PNorm = 108.8670, GNorm = 0.7067, lr_0 = 8.5353e-04
Loss = 1.0324e-01, PNorm = 108.9822, GNorm = 1.1366, lr_0 = 8.5295e-04
Loss = 9.2562e-02, PNorm = 109.1090, GNorm = 1.8964, lr_0 = 8.5236e-04
Loss = 1.1042e-01, PNorm = 109.2203, GNorm = 0.5873, lr_0 = 8.5178e-04
Loss = 9.4350e-02, PNorm = 109.3386, GNorm = 0.8141, lr_0 = 8.5120e-04
Loss = 9.6753e-02, PNorm = 109.4482, GNorm = 0.8296, lr_0 = 8.5061e-04
Loss = 1.1089e-01, PNorm = 109.5606, GNorm = 0.8347, lr_0 = 8.5003e-04
Loss = 1.0303e-01, PNorm = 109.6766, GNorm = 0.7362, lr_0 = 8.4945e-04
Loss = 9.7307e-02, PNorm = 109.7905, GNorm = 0.9046, lr_0 = 8.4887e-04
Loss = 9.7826e-02, PNorm = 109.9116, GNorm = 0.8284, lr_0 = 8.4828e-04
Validation mae = 0.290787
Epoch 4
Loss = 5.8339e-02, PNorm = 110.0081, GNorm = 0.7295, lr_0 = 8.4770e-04
Loss = 6.8541e-02, PNorm = 110.0927, GNorm = 0.5946, lr_0 = 8.4712e-04
Loss = 5.6228e-02, PNorm = 110.1678, GNorm = 0.9018, lr_0 = 8.4654e-04
Loss = 6.0178e-02, PNorm = 110.2327, GNorm = 0.6034, lr_0 = 8.4596e-04
Loss = 5.1321e-02, PNorm = 110.3034, GNorm = 0.4599, lr_0 = 8.4538e-04
Loss = 5.0138e-02, PNorm = 110.3591, GNorm = 0.5874, lr_0 = 8.4480e-04
Loss = 5.1639e-02, PNorm = 110.4208, GNorm = 0.3054, lr_0 = 8.4423e-04
Loss = 5.8267e-02, PNorm = 110.4794, GNorm = 0.5477, lr_0 = 8.4365e-04
Loss = 5.5941e-02, PNorm = 110.5474, GNorm = 0.4215, lr_0 = 8.4307e-04
Loss = 4.8121e-02, PNorm = 110.6152, GNorm = 0.4386, lr_0 = 8.4249e-04
Loss = 5.7498e-02, PNorm = 110.6810, GNorm = 0.5096, lr_0 = 8.4191e-04
Loss = 5.5586e-02, PNorm = 110.7439, GNorm = 0.5039, lr_0 = 8.4134e-04
Loss = 5.7225e-02, PNorm = 110.8165, GNorm = 0.8291, lr_0 = 8.4076e-04
Loss = 5.5636e-02, PNorm = 110.8820, GNorm = 0.3221, lr_0 = 8.4019e-04
Loss = 5.1843e-02, PNorm = 110.9467, GNorm = 0.2466, lr_0 = 8.3961e-04
Loss = 5.7433e-02, PNorm = 111.0075, GNorm = 0.4430, lr_0 = 8.3903e-04
Loss = 4.9660e-02, PNorm = 111.0764, GNorm = 0.6453, lr_0 = 8.3846e-04
Loss = 5.5566e-02, PNorm = 111.1451, GNorm = 0.5947, lr_0 = 8.3789e-04
Loss = 5.5663e-02, PNorm = 111.2108, GNorm = 0.6593, lr_0 = 8.3731e-04
Loss = 5.6078e-02, PNorm = 111.2753, GNorm = 0.5089, lr_0 = 8.3674e-04
Loss = 5.4667e-02, PNorm = 111.3329, GNorm = 0.3689, lr_0 = 8.3616e-04
Loss = 4.7476e-02, PNorm = 111.3992, GNorm = 0.2704, lr_0 = 8.3559e-04
Loss = 5.0034e-02, PNorm = 111.4649, GNorm = 0.4059, lr_0 = 8.3502e-04
Loss = 5.8603e-02, PNorm = 111.5360, GNorm = 0.5355, lr_0 = 8.3445e-04
Loss = 5.5756e-02, PNorm = 111.6108, GNorm = 0.7399, lr_0 = 8.3388e-04
Loss = 5.8939e-02, PNorm = 111.6844, GNorm = 0.3549, lr_0 = 8.3330e-04
Loss = 5.3733e-02, PNorm = 111.7598, GNorm = 0.7554, lr_0 = 8.3273e-04
Loss = 5.7328e-02, PNorm = 111.8258, GNorm = 1.1189, lr_0 = 8.3216e-04
Loss = 5.2600e-02, PNorm = 111.9019, GNorm = 0.2782, lr_0 = 8.3159e-04
Loss = 5.2843e-02, PNorm = 111.9796, GNorm = 0.6060, lr_0 = 8.3102e-04
Loss = 6.3398e-02, PNorm = 112.0393, GNorm = 0.6198, lr_0 = 8.3045e-04
Loss = 5.5141e-02, PNorm = 112.1114, GNorm = 0.5252, lr_0 = 8.2988e-04
Loss = 5.4479e-02, PNorm = 112.1768, GNorm = 0.6309, lr_0 = 8.2932e-04
Loss = 6.5370e-02, PNorm = 112.2587, GNorm = 0.3039, lr_0 = 8.2875e-04
Loss = 5.6212e-02, PNorm = 112.3328, GNorm = 0.3922, lr_0 = 8.2818e-04
Loss = 5.9062e-02, PNorm = 112.4158, GNorm = 0.6782, lr_0 = 8.2761e-04
Loss = 6.6123e-02, PNorm = 112.4946, GNorm = 0.6836, lr_0 = 8.2705e-04
Loss = 5.3607e-02, PNorm = 112.5717, GNorm = 0.7940, lr_0 = 8.2648e-04
Loss = 5.4930e-02, PNorm = 112.6510, GNorm = 0.2651, lr_0 = 8.2591e-04
Loss = 5.4868e-02, PNorm = 112.7247, GNorm = 0.4107, lr_0 = 8.2535e-04
Loss = 5.5891e-02, PNorm = 112.8000, GNorm = 0.4210, lr_0 = 8.2478e-04
Loss = 5.7534e-02, PNorm = 112.8828, GNorm = 0.4051, lr_0 = 8.2422e-04
Loss = 5.6430e-02, PNorm = 112.9538, GNorm = 0.5346, lr_0 = 8.2365e-04
Loss = 5.3961e-02, PNorm = 113.0206, GNorm = 0.6765, lr_0 = 8.2309e-04
Loss = 5.6400e-02, PNorm = 113.0920, GNorm = 0.5448, lr_0 = 8.2252e-04
Loss = 5.4965e-02, PNorm = 113.1663, GNorm = 0.3865, lr_0 = 8.2196e-04
Loss = 4.9787e-02, PNorm = 113.2369, GNorm = 0.7210, lr_0 = 8.2140e-04
Loss = 5.7602e-02, PNorm = 113.3106, GNorm = 0.3854, lr_0 = 8.2084e-04
Loss = 6.2938e-02, PNorm = 113.3844, GNorm = 0.3427, lr_0 = 8.2027e-04
Loss = 5.8666e-02, PNorm = 113.4699, GNorm = 0.6073, lr_0 = 8.1971e-04
Loss = 6.2062e-02, PNorm = 113.5603, GNorm = 0.5427, lr_0 = 8.1915e-04
Loss = 5.6350e-02, PNorm = 113.6464, GNorm = 0.2817, lr_0 = 8.1859e-04
Loss = 5.2819e-02, PNorm = 113.7328, GNorm = 0.3703, lr_0 = 8.1803e-04
Loss = 4.8989e-02, PNorm = 113.8145, GNorm = 0.3027, lr_0 = 8.1747e-04
Loss = 5.9348e-02, PNorm = 113.8885, GNorm = 0.2553, lr_0 = 8.1691e-04
Loss = 5.9850e-02, PNorm = 113.9750, GNorm = 1.0343, lr_0 = 8.1635e-04
Loss = 6.3897e-02, PNorm = 114.0601, GNorm = 0.3415, lr_0 = 8.1579e-04
Loss = 6.2194e-02, PNorm = 114.1561, GNorm = 0.9743, lr_0 = 8.1523e-04
Loss = 6.0942e-02, PNorm = 114.2390, GNorm = 0.5166, lr_0 = 8.1467e-04
Loss = 6.2853e-02, PNorm = 114.3307, GNorm = 0.3751, lr_0 = 8.1411e-04
Loss = 5.3207e-02, PNorm = 114.4323, GNorm = 0.4741, lr_0 = 8.1355e-04
Loss = 5.4313e-02, PNorm = 114.5103, GNorm = 0.6706, lr_0 = 8.1300e-04
Loss = 6.1504e-02, PNorm = 114.5924, GNorm = 0.4637, lr_0 = 8.1244e-04
Loss = 6.4307e-02, PNorm = 114.6783, GNorm = 0.3483, lr_0 = 8.1188e-04
Loss = 5.8984e-02, PNorm = 114.7562, GNorm = 0.4325, lr_0 = 8.1133e-04
Loss = 5.7950e-02, PNorm = 114.8388, GNorm = 0.7631, lr_0 = 8.1077e-04
Loss = 5.6532e-02, PNorm = 114.9235, GNorm = 0.3335, lr_0 = 8.1022e-04
Loss = 6.0519e-02, PNorm = 115.0039, GNorm = 0.4572, lr_0 = 8.0966e-04
Loss = 6.2541e-02, PNorm = 115.0974, GNorm = 0.4530, lr_0 = 8.0911e-04
Loss = 6.6988e-02, PNorm = 115.1862, GNorm = 0.4764, lr_0 = 8.0855e-04
Loss = 6.3113e-02, PNorm = 115.2726, GNorm = 0.8504, lr_0 = 8.0800e-04
Loss = 6.6745e-02, PNorm = 115.3676, GNorm = 0.5175, lr_0 = 8.0745e-04
Loss = 6.8757e-02, PNorm = 115.4653, GNorm = 0.5244, lr_0 = 8.0689e-04
Loss = 6.5172e-02, PNorm = 115.5692, GNorm = 0.4033, lr_0 = 8.0634e-04
Loss = 6.1333e-02, PNorm = 115.6550, GNorm = 0.8714, lr_0 = 8.0579e-04
Loss = 5.2730e-02, PNorm = 115.7469, GNorm = 0.7069, lr_0 = 8.0523e-04
Loss = 6.5220e-02, PNorm = 115.8405, GNorm = 0.6927, lr_0 = 8.0468e-04
Loss = 7.9483e-02, PNorm = 115.9315, GNorm = 0.3532, lr_0 = 8.0413e-04
Loss = 5.7682e-02, PNorm = 116.0293, GNorm = 0.4932, lr_0 = 8.0358e-04
Loss = 6.3224e-02, PNorm = 116.1297, GNorm = 0.7039, lr_0 = 8.0303e-04
Loss = 6.9210e-02, PNorm = 116.2132, GNorm = 1.5355, lr_0 = 8.0248e-04
Loss = 5.5229e-02, PNorm = 116.3103, GNorm = 0.6277, lr_0 = 8.0193e-04
Loss = 6.6424e-02, PNorm = 116.4026, GNorm = 0.3617, lr_0 = 8.0138e-04
Loss = 5.8972e-02, PNorm = 116.4972, GNorm = 0.2908, lr_0 = 8.0083e-04
Loss = 7.2429e-02, PNorm = 116.5901, GNorm = 0.4535, lr_0 = 8.0028e-04
Loss = 6.0297e-02, PNorm = 116.6887, GNorm = 0.3810, lr_0 = 7.9974e-04
Loss = 6.7703e-02, PNorm = 116.7804, GNorm = 0.3402, lr_0 = 7.9919e-04
Loss = 5.4297e-02, PNorm = 116.8748, GNorm = 0.3570, lr_0 = 7.9864e-04
Loss = 6.1181e-02, PNorm = 116.9594, GNorm = 0.4934, lr_0 = 7.9809e-04
Loss = 7.0237e-02, PNorm = 117.0364, GNorm = 0.5626, lr_0 = 7.9755e-04
Loss = 5.8285e-02, PNorm = 117.1328, GNorm = 0.6324, lr_0 = 7.9700e-04
Loss = 6.5365e-02, PNorm = 117.2114, GNorm = 0.7774, lr_0 = 7.9645e-04
Loss = 6.3506e-02, PNorm = 117.3184, GNorm = 0.7733, lr_0 = 7.9591e-04
Loss = 7.1520e-02, PNorm = 117.4057, GNorm = 0.5535, lr_0 = 7.9536e-04
Loss = 6.8386e-02, PNorm = 117.5138, GNorm = 0.9933, lr_0 = 7.9482e-04
Loss = 6.9765e-02, PNorm = 117.6080, GNorm = 0.9247, lr_0 = 7.9427e-04
Loss = 6.7912e-02, PNorm = 117.7147, GNorm = 0.4348, lr_0 = 7.9373e-04
Loss = 6.3737e-02, PNorm = 117.7991, GNorm = 0.6125, lr_0 = 7.9319e-04
Loss = 6.2967e-02, PNorm = 117.8952, GNorm = 0.8650, lr_0 = 7.9264e-04
Loss = 7.0803e-02, PNorm = 117.9900, GNorm = 0.7566, lr_0 = 7.9210e-04
Loss = 6.6528e-02, PNorm = 118.0944, GNorm = 0.9185, lr_0 = 7.9156e-04
Loss = 6.1530e-02, PNorm = 118.1947, GNorm = 0.3711, lr_0 = 7.9101e-04
Loss = 6.3418e-02, PNorm = 118.2903, GNorm = 0.5685, lr_0 = 7.9047e-04
Loss = 6.7906e-02, PNorm = 118.3947, GNorm = 0.4151, lr_0 = 7.8993e-04
Loss = 6.4545e-02, PNorm = 118.4944, GNorm = 0.3883, lr_0 = 7.8939e-04
Loss = 7.4946e-02, PNorm = 118.6015, GNorm = 0.5848, lr_0 = 7.8885e-04
Loss = 7.2236e-02, PNorm = 118.7125, GNorm = 0.4119, lr_0 = 7.8831e-04
Loss = 6.8137e-02, PNorm = 118.8184, GNorm = 0.3747, lr_0 = 7.8777e-04
Loss = 6.7722e-02, PNorm = 118.9248, GNorm = 0.4033, lr_0 = 7.8723e-04
Loss = 5.7281e-02, PNorm = 119.0292, GNorm = 0.3757, lr_0 = 7.8669e-04
Loss = 6.6646e-02, PNorm = 119.1242, GNorm = 0.6178, lr_0 = 7.8615e-04
Loss = 6.3633e-02, PNorm = 119.2190, GNorm = 0.8225, lr_0 = 7.8561e-04
Loss = 6.7772e-02, PNorm = 119.3146, GNorm = 0.4743, lr_0 = 7.8507e-04
Loss = 7.1025e-02, PNorm = 119.4248, GNorm = 0.8379, lr_0 = 7.8454e-04
Loss = 6.8408e-02, PNorm = 119.5335, GNorm = 0.7870, lr_0 = 7.8400e-04
Loss = 6.6391e-02, PNorm = 119.6333, GNorm = 0.5535, lr_0 = 7.8346e-04
Loss = 7.0461e-02, PNorm = 119.7248, GNorm = 0.3130, lr_0 = 7.8293e-04
Loss = 7.6327e-02, PNorm = 119.8197, GNorm = 0.3884, lr_0 = 7.8239e-04
Loss = 6.8012e-02, PNorm = 119.9208, GNorm = 0.6574, lr_0 = 7.8185e-04
Loss = 6.6958e-02, PNorm = 120.0273, GNorm = 0.5943, lr_0 = 7.8132e-04
Validation mae = 0.293169
Epoch 5
Loss = 4.7631e-02, PNorm = 120.1161, GNorm = 0.5654, lr_0 = 7.8078e-04
Loss = 4.4630e-02, PNorm = 120.1913, GNorm = 0.6444, lr_0 = 7.8025e-04
Loss = 4.9887e-02, PNorm = 120.2593, GNorm = 0.5799, lr_0 = 7.7971e-04
Loss = 4.4860e-02, PNorm = 120.3346, GNorm = 0.5432, lr_0 = 7.7918e-04
Loss = 4.9719e-02, PNorm = 120.4052, GNorm = 1.1268, lr_0 = 7.7864e-04
Loss = 5.7642e-02, PNorm = 120.4794, GNorm = 1.0912, lr_0 = 7.7811e-04
Loss = 4.9342e-02, PNorm = 120.5529, GNorm = 0.3087, lr_0 = 7.7758e-04
Loss = 4.3564e-02, PNorm = 120.6203, GNorm = 0.6237, lr_0 = 7.7705e-04
Loss = 4.2305e-02, PNorm = 120.6773, GNorm = 0.5244, lr_0 = 7.7651e-04
Loss = 4.1901e-02, PNorm = 120.7388, GNorm = 0.4244, lr_0 = 7.7598e-04
Loss = 3.9620e-02, PNorm = 120.8019, GNorm = 0.2354, lr_0 = 7.7545e-04
Loss = 4.2264e-02, PNorm = 120.8652, GNorm = 0.4394, lr_0 = 7.7492e-04
Loss = 4.2912e-02, PNorm = 120.9371, GNorm = 0.4767, lr_0 = 7.7439e-04
Loss = 5.0120e-02, PNorm = 120.9949, GNorm = 0.2310, lr_0 = 7.7386e-04
Loss = 4.8036e-02, PNorm = 121.0714, GNorm = 0.5710, lr_0 = 7.7333e-04
Loss = 3.9731e-02, PNorm = 121.1328, GNorm = 0.4020, lr_0 = 7.7280e-04
Loss = 4.2218e-02, PNorm = 121.1981, GNorm = 0.3078, lr_0 = 7.7227e-04
Loss = 4.0301e-02, PNorm = 121.2570, GNorm = 0.2776, lr_0 = 7.7174e-04
Loss = 4.0788e-02, PNorm = 121.3098, GNorm = 0.3822, lr_0 = 7.7121e-04
Loss = 4.4354e-02, PNorm = 121.3726, GNorm = 0.5044, lr_0 = 7.7068e-04
Loss = 3.8278e-02, PNorm = 121.4382, GNorm = 0.5705, lr_0 = 7.7015e-04
Loss = 3.5751e-02, PNorm = 121.4934, GNorm = 0.3811, lr_0 = 7.6963e-04
Loss = 4.2992e-02, PNorm = 121.5484, GNorm = 0.4875, lr_0 = 7.6910e-04
Loss = 4.6089e-02, PNorm = 121.6100, GNorm = 0.3878, lr_0 = 7.6857e-04
Loss = 3.8082e-02, PNorm = 121.6709, GNorm = 0.2904, lr_0 = 7.6805e-04
Loss = 3.9154e-02, PNorm = 121.7352, GNorm = 0.3714, lr_0 = 7.6752e-04
Loss = 3.5120e-02, PNorm = 121.7911, GNorm = 0.3011, lr_0 = 7.6699e-04
Loss = 4.1874e-02, PNorm = 121.8489, GNorm = 0.6347, lr_0 = 7.6647e-04
Loss = 4.1201e-02, PNorm = 121.9102, GNorm = 0.5295, lr_0 = 7.6594e-04
Loss = 3.9849e-02, PNorm = 121.9645, GNorm = 0.3001, lr_0 = 7.6542e-04
Loss = 4.1372e-02, PNorm = 122.0203, GNorm = 0.2948, lr_0 = 7.6489e-04
Loss = 4.8875e-02, PNorm = 122.0867, GNorm = 0.4016, lr_0 = 7.6437e-04
Loss = 3.8925e-02, PNorm = 122.1588, GNorm = 0.6668, lr_0 = 7.6385e-04
Loss = 3.7027e-02, PNorm = 122.2324, GNorm = 0.3023, lr_0 = 7.6332e-04
Loss = 4.6861e-02, PNorm = 122.2989, GNorm = 0.4936, lr_0 = 7.6280e-04
Loss = 4.2574e-02, PNorm = 122.3694, GNorm = 0.5320, lr_0 = 7.6228e-04
Loss = 5.0097e-02, PNorm = 122.4356, GNorm = 1.0382, lr_0 = 7.6176e-04
Loss = 4.4283e-02, PNorm = 122.4945, GNorm = 0.3756, lr_0 = 7.6123e-04
Loss = 4.3627e-02, PNorm = 122.5614, GNorm = 0.2519, lr_0 = 7.6071e-04
Loss = 3.7474e-02, PNorm = 122.6235, GNorm = 0.4069, lr_0 = 7.6019e-04
Loss = 4.1971e-02, PNorm = 122.6805, GNorm = 0.9479, lr_0 = 7.5967e-04
Loss = 4.4331e-02, PNorm = 122.7460, GNorm = 0.8525, lr_0 = 7.5915e-04
Loss = 4.1376e-02, PNorm = 122.8051, GNorm = 0.5696, lr_0 = 7.5863e-04
Loss = 4.4470e-02, PNorm = 122.8665, GNorm = 0.7544, lr_0 = 7.5811e-04
Loss = 3.8038e-02, PNorm = 122.9357, GNorm = 0.3803, lr_0 = 7.5759e-04
Loss = 3.9977e-02, PNorm = 123.0057, GNorm = 0.4581, lr_0 = 7.5707e-04
Loss = 5.0805e-02, PNorm = 123.0828, GNorm = 0.5889, lr_0 = 7.5655e-04
Loss = 4.3663e-02, PNorm = 123.1543, GNorm = 0.2958, lr_0 = 7.5603e-04
Loss = 4.2261e-02, PNorm = 123.2301, GNorm = 0.3228, lr_0 = 7.5552e-04
Loss = 4.1711e-02, PNorm = 123.3072, GNorm = 0.4827, lr_0 = 7.5500e-04
Loss = 4.4734e-02, PNorm = 123.3780, GNorm = 0.3242, lr_0 = 7.5448e-04
Loss = 3.9425e-02, PNorm = 123.4475, GNorm = 0.5231, lr_0 = 7.5397e-04
Loss = 4.1674e-02, PNorm = 123.5130, GNorm = 1.2683, lr_0 = 7.5345e-04
Loss = 4.3586e-02, PNorm = 123.5825, GNorm = 0.5797, lr_0 = 7.5293e-04
Loss = 4.5223e-02, PNorm = 123.6580, GNorm = 0.3682, lr_0 = 7.5242e-04
Loss = 4.5528e-02, PNorm = 123.7361, GNorm = 0.4096, lr_0 = 7.5190e-04
Loss = 4.4896e-02, PNorm = 123.8045, GNorm = 0.7508, lr_0 = 7.5139e-04
Loss = 4.3889e-02, PNorm = 123.8770, GNorm = 0.3816, lr_0 = 7.5087e-04
Loss = 4.1611e-02, PNorm = 123.9517, GNorm = 0.6232, lr_0 = 7.5036e-04
Loss = 4.7406e-02, PNorm = 124.0218, GNorm = 0.3981, lr_0 = 7.4984e-04
Loss = 4.0854e-02, PNorm = 124.0996, GNorm = 0.3053, lr_0 = 7.4933e-04
Loss = 3.7751e-02, PNorm = 124.1668, GNorm = 0.5714, lr_0 = 7.4882e-04
Loss = 4.7217e-02, PNorm = 124.2362, GNorm = 0.4908, lr_0 = 7.4830e-04
Loss = 5.2190e-02, PNorm = 124.3130, GNorm = 0.3839, lr_0 = 7.4779e-04
Loss = 4.5835e-02, PNorm = 124.3861, GNorm = 0.5893, lr_0 = 7.4728e-04
Loss = 4.2267e-02, PNorm = 124.4655, GNorm = 0.7501, lr_0 = 7.4677e-04
Loss = 4.5141e-02, PNorm = 124.5463, GNorm = 1.0247, lr_0 = 7.4625e-04
Loss = 4.9647e-02, PNorm = 124.6338, GNorm = 0.4644, lr_0 = 7.4574e-04
Loss = 5.4568e-02, PNorm = 124.7098, GNorm = 1.1936, lr_0 = 7.4523e-04
Loss = 4.3711e-02, PNorm = 124.7981, GNorm = 0.5878, lr_0 = 7.4472e-04
Loss = 4.2397e-02, PNorm = 124.8710, GNorm = 0.6818, lr_0 = 7.4421e-04
Loss = 4.1624e-02, PNorm = 124.9396, GNorm = 0.6611, lr_0 = 7.4370e-04
Loss = 4.3726e-02, PNorm = 125.0042, GNorm = 0.3528, lr_0 = 7.4319e-04
Loss = 5.0263e-02, PNorm = 125.0841, GNorm = 0.3333, lr_0 = 7.4268e-04
Loss = 5.0671e-02, PNorm = 125.1654, GNorm = 0.7996, lr_0 = 7.4217e-04
Loss = 4.1151e-02, PNorm = 125.2444, GNorm = 0.4838, lr_0 = 7.4167e-04
Loss = 4.6429e-02, PNorm = 125.3187, GNorm = 0.7908, lr_0 = 7.4116e-04
Loss = 4.8459e-02, PNorm = 125.4027, GNorm = 1.1974, lr_0 = 7.4065e-04
Loss = 4.7341e-02, PNorm = 125.4767, GNorm = 0.3682, lr_0 = 7.4014e-04
Loss = 3.9720e-02, PNorm = 125.5613, GNorm = 0.3279, lr_0 = 7.3964e-04
Loss = 5.3345e-02, PNorm = 125.6388, GNorm = 0.3067, lr_0 = 7.3913e-04
Loss = 4.2596e-02, PNorm = 125.7165, GNorm = 0.2570, lr_0 = 7.3862e-04
Loss = 4.6260e-02, PNorm = 125.7933, GNorm = 0.5720, lr_0 = 7.3812e-04
Loss = 5.3532e-02, PNorm = 125.8739, GNorm = 1.0947, lr_0 = 7.3761e-04
Loss = 4.0089e-02, PNorm = 125.9462, GNorm = 0.4225, lr_0 = 7.3711e-04
Loss = 4.3247e-02, PNorm = 126.0255, GNorm = 1.8815, lr_0 = 7.3660e-04
Loss = 4.4131e-02, PNorm = 126.0968, GNorm = 0.3495, lr_0 = 7.3610e-04
Loss = 4.9541e-02, PNorm = 126.1745, GNorm = 0.7777, lr_0 = 7.3559e-04
Loss = 4.9545e-02, PNorm = 126.2615, GNorm = 0.7383, lr_0 = 7.3509e-04
Loss = 4.6014e-02, PNorm = 126.3489, GNorm = 0.3033, lr_0 = 7.3458e-04
Loss = 4.1937e-02, PNorm = 126.4317, GNorm = 0.3693, lr_0 = 7.3408e-04
Loss = 4.5206e-02, PNorm = 126.5048, GNorm = 0.3394, lr_0 = 7.3358e-04
Loss = 5.2288e-02, PNorm = 126.5731, GNorm = 0.4852, lr_0 = 7.3308e-04
Loss = 4.2971e-02, PNorm = 126.6477, GNorm = 0.4303, lr_0 = 7.3257e-04
Loss = 5.1276e-02, PNorm = 126.7255, GNorm = 0.3563, lr_0 = 7.3207e-04
Loss = 5.0910e-02, PNorm = 126.8156, GNorm = 0.9121, lr_0 = 7.3157e-04
Loss = 4.6731e-02, PNorm = 126.8943, GNorm = 0.9252, lr_0 = 7.3107e-04
Loss = 5.3882e-02, PNorm = 126.9780, GNorm = 0.5071, lr_0 = 7.3057e-04
Loss = 4.7000e-02, PNorm = 127.0613, GNorm = 0.7944, lr_0 = 7.3007e-04
Loss = 4.6539e-02, PNorm = 127.1454, GNorm = 1.2094, lr_0 = 7.2957e-04
Loss = 4.0914e-02, PNorm = 127.2203, GNorm = 0.3642, lr_0 = 7.2907e-04
Loss = 6.0138e-02, PNorm = 127.2990, GNorm = 0.8129, lr_0 = 7.2857e-04
Loss = 4.3965e-02, PNorm = 127.3751, GNorm = 0.4381, lr_0 = 7.2807e-04
Loss = 5.2750e-02, PNorm = 127.4600, GNorm = 0.5545, lr_0 = 7.2757e-04
Loss = 4.4442e-02, PNorm = 127.5428, GNorm = 0.7858, lr_0 = 7.2707e-04
Loss = 4.8821e-02, PNorm = 127.6301, GNorm = 0.2449, lr_0 = 7.2657e-04
Loss = 4.5529e-02, PNorm = 127.7129, GNorm = 0.2496, lr_0 = 7.2608e-04
Loss = 4.3456e-02, PNorm = 127.7910, GNorm = 0.2801, lr_0 = 7.2558e-04
Loss = 4.7967e-02, PNorm = 127.8692, GNorm = 0.6833, lr_0 = 7.2508e-04
Loss = 4.1014e-02, PNorm = 127.9498, GNorm = 0.4971, lr_0 = 7.2458e-04
Loss = 6.5533e-02, PNorm = 128.0201, GNorm = 1.2665, lr_0 = 7.2409e-04
Loss = 5.0952e-02, PNorm = 128.1006, GNorm = 0.6082, lr_0 = 7.2359e-04
Loss = 5.1572e-02, PNorm = 128.1788, GNorm = 0.5861, lr_0 = 7.2310e-04
Loss = 4.2898e-02, PNorm = 128.2604, GNorm = 0.4568, lr_0 = 7.2260e-04
Loss = 4.9531e-02, PNorm = 128.3467, GNorm = 0.3447, lr_0 = 7.2211e-04
Loss = 4.9500e-02, PNorm = 128.4243, GNorm = 0.3299, lr_0 = 7.2161e-04
Loss = 5.4362e-02, PNorm = 128.5101, GNorm = 0.6491, lr_0 = 7.2112e-04
Loss = 4.5199e-02, PNorm = 128.5962, GNorm = 0.3203, lr_0 = 7.2062e-04
Loss = 5.1032e-02, PNorm = 128.6819, GNorm = 1.0890, lr_0 = 7.2013e-04
Loss = 4.4585e-02, PNorm = 128.7611, GNorm = 0.3570, lr_0 = 7.1964e-04
Validation mae = 0.286076
Epoch 6
Loss = 4.2704e-02, PNorm = 128.8215, GNorm = 0.6071, lr_0 = 7.1914e-04
Loss = 3.5126e-02, PNorm = 128.8860, GNorm = 1.3000, lr_0 = 7.1865e-04
Loss = 3.6829e-02, PNorm = 128.9381, GNorm = 0.2398, lr_0 = 7.1816e-04
Loss = 3.0686e-02, PNorm = 128.9902, GNorm = 0.4682, lr_0 = 7.1767e-04
Loss = 3.7011e-02, PNorm = 129.0392, GNorm = 0.3714, lr_0 = 7.1717e-04
Loss = 3.0040e-02, PNorm = 129.0886, GNorm = 0.3303, lr_0 = 7.1668e-04
Loss = 3.6596e-02, PNorm = 129.1424, GNorm = 0.2984, lr_0 = 7.1619e-04
Loss = 3.7071e-02, PNorm = 129.1885, GNorm = 0.6519, lr_0 = 7.1570e-04
Loss = 3.0768e-02, PNorm = 129.2411, GNorm = 1.0889, lr_0 = 7.1521e-04
Loss = 3.3450e-02, PNorm = 129.2904, GNorm = 0.4785, lr_0 = 7.1472e-04
Loss = 3.6808e-02, PNorm = 129.3438, GNorm = 0.2133, lr_0 = 7.1423e-04
Loss = 3.2349e-02, PNorm = 129.3963, GNorm = 0.4755, lr_0 = 7.1374e-04
Loss = 4.0098e-02, PNorm = 129.4558, GNorm = 0.4111, lr_0 = 7.1325e-04
Loss = 2.9249e-02, PNorm = 129.5110, GNorm = 0.6654, lr_0 = 7.1277e-04
Loss = 3.8687e-02, PNorm = 129.5608, GNorm = 0.3914, lr_0 = 7.1228e-04
Loss = 3.1545e-02, PNorm = 129.6053, GNorm = 0.2749, lr_0 = 7.1179e-04
Loss = 3.9675e-02, PNorm = 129.6571, GNorm = 0.5189, lr_0 = 7.1130e-04
Loss = 3.0038e-02, PNorm = 129.7172, GNorm = 0.3082, lr_0 = 7.1081e-04
Loss = 3.1307e-02, PNorm = 129.7759, GNorm = 0.4945, lr_0 = 7.1033e-04
Loss = 2.9225e-02, PNorm = 129.8285, GNorm = 0.4270, lr_0 = 7.0984e-04
Loss = 3.5229e-02, PNorm = 129.8833, GNorm = 0.7257, lr_0 = 7.0935e-04
Loss = 3.7172e-02, PNorm = 129.9426, GNorm = 0.3652, lr_0 = 7.0887e-04
Loss = 2.9193e-02, PNorm = 129.9950, GNorm = 0.2478, lr_0 = 7.0838e-04
Loss = 2.9423e-02, PNorm = 130.0463, GNorm = 0.2539, lr_0 = 7.0790e-04
Loss = 2.9302e-02, PNorm = 130.0960, GNorm = 0.1986, lr_0 = 7.0741e-04
Loss = 3.2517e-02, PNorm = 130.1447, GNorm = 0.5940, lr_0 = 7.0693e-04
Loss = 2.8020e-02, PNorm = 130.1938, GNorm = 0.5050, lr_0 = 7.0644e-04
Loss = 2.9973e-02, PNorm = 130.2375, GNorm = 0.2738, lr_0 = 7.0596e-04
Loss = 3.0136e-02, PNorm = 130.2863, GNorm = 0.4654, lr_0 = 7.0548e-04
Loss = 3.4078e-02, PNorm = 130.3334, GNorm = 0.4271, lr_0 = 7.0499e-04
Loss = 3.1191e-02, PNorm = 130.3853, GNorm = 0.7219, lr_0 = 7.0451e-04
Loss = 3.3131e-02, PNorm = 130.4340, GNorm = 0.5407, lr_0 = 7.0403e-04
Loss = 2.9720e-02, PNorm = 130.4965, GNorm = 0.3502, lr_0 = 7.0354e-04
Loss = 3.1932e-02, PNorm = 130.5446, GNorm = 0.5790, lr_0 = 7.0306e-04
Loss = 2.8144e-02, PNorm = 130.6045, GNorm = 0.2332, lr_0 = 7.0258e-04
Loss = 3.0940e-02, PNorm = 130.6514, GNorm = 0.6466, lr_0 = 7.0210e-04
Loss = 3.2969e-02, PNorm = 130.7018, GNorm = 0.6154, lr_0 = 7.0162e-04
Loss = 3.1212e-02, PNorm = 130.7541, GNorm = 0.4107, lr_0 = 7.0114e-04
Loss = 3.0399e-02, PNorm = 130.8102, GNorm = 0.5080, lr_0 = 7.0066e-04
Loss = 3.5257e-02, PNorm = 130.8710, GNorm = 0.4424, lr_0 = 7.0018e-04
Loss = 3.2634e-02, PNorm = 130.9250, GNorm = 1.0644, lr_0 = 6.9970e-04
Loss = 3.5583e-02, PNorm = 130.9791, GNorm = 0.2551, lr_0 = 6.9922e-04
Loss = 3.3566e-02, PNorm = 131.0327, GNorm = 0.3811, lr_0 = 6.9874e-04
Loss = 3.6708e-02, PNorm = 131.0910, GNorm = 0.2328, lr_0 = 6.9826e-04
Loss = 2.7158e-02, PNorm = 131.1522, GNorm = 0.5906, lr_0 = 6.9778e-04
Loss = 3.5962e-02, PNorm = 131.2106, GNorm = 0.2308, lr_0 = 6.9730e-04
Loss = 3.2755e-02, PNorm = 131.2703, GNorm = 0.6786, lr_0 = 6.9683e-04
Loss = 2.9572e-02, PNorm = 131.3283, GNorm = 0.6712, lr_0 = 6.9635e-04
Loss = 3.3190e-02, PNorm = 131.3868, GNorm = 0.7731, lr_0 = 6.9587e-04
Loss = 3.6334e-02, PNorm = 131.4487, GNorm = 0.5420, lr_0 = 6.9540e-04
Loss = 3.2648e-02, PNorm = 131.5061, GNorm = 1.1311, lr_0 = 6.9492e-04
Loss = 2.8820e-02, PNorm = 131.5644, GNorm = 0.4274, lr_0 = 6.9444e-04
Loss = 2.5834e-02, PNorm = 131.6252, GNorm = 0.5245, lr_0 = 6.9397e-04
Loss = 3.1516e-02, PNorm = 131.6846, GNorm = 0.2908, lr_0 = 6.9349e-04
Loss = 3.2894e-02, PNorm = 131.7412, GNorm = 0.4716, lr_0 = 6.9302e-04
Loss = 3.1787e-02, PNorm = 131.7986, GNorm = 0.3841, lr_0 = 6.9254e-04
Loss = 3.4015e-02, PNorm = 131.8588, GNorm = 0.2582, lr_0 = 6.9207e-04
Loss = 3.2111e-02, PNorm = 131.9205, GNorm = 0.3274, lr_0 = 6.9159e-04
Loss = 3.3640e-02, PNorm = 131.9782, GNorm = 0.4498, lr_0 = 6.9112e-04
Loss = 4.0584e-02, PNorm = 132.0421, GNorm = 0.4688, lr_0 = 6.9065e-04
Loss = 3.1323e-02, PNorm = 132.0998, GNorm = 0.8918, lr_0 = 6.9017e-04
Loss = 3.6531e-02, PNorm = 132.1623, GNorm = 0.4314, lr_0 = 6.8970e-04
Loss = 3.6997e-02, PNorm = 132.2229, GNorm = 0.5488, lr_0 = 6.8923e-04
Loss = 3.5055e-02, PNorm = 132.2893, GNorm = 0.3591, lr_0 = 6.8876e-04
Loss = 2.9756e-02, PNorm = 132.3490, GNorm = 0.5488, lr_0 = 6.8828e-04
Loss = 3.5275e-02, PNorm = 132.4143, GNorm = 0.3691, lr_0 = 6.8781e-04
Loss = 3.3794e-02, PNorm = 132.4791, GNorm = 0.8785, lr_0 = 6.8734e-04
Loss = 3.5094e-02, PNorm = 132.5504, GNorm = 1.2359, lr_0 = 6.8687e-04
Loss = 2.7508e-02, PNorm = 132.6121, GNorm = 0.6405, lr_0 = 6.8640e-04
Loss = 3.3513e-02, PNorm = 132.6714, GNorm = 0.2909, lr_0 = 6.8593e-04
Loss = 3.5799e-02, PNorm = 132.7311, GNorm = 0.3204, lr_0 = 6.8546e-04
Loss = 4.0554e-02, PNorm = 132.8041, GNorm = 0.2406, lr_0 = 6.8499e-04
Loss = 3.2548e-02, PNorm = 132.8732, GNorm = 0.3813, lr_0 = 6.8452e-04
Loss = 3.6497e-02, PNorm = 132.9426, GNorm = 0.3160, lr_0 = 6.8405e-04
Loss = 3.3468e-02, PNorm = 133.0091, GNorm = 0.7546, lr_0 = 6.8358e-04
Loss = 3.2905e-02, PNorm = 133.0751, GNorm = 0.5463, lr_0 = 6.8312e-04
Loss = 3.7435e-02, PNorm = 133.1412, GNorm = 0.6578, lr_0 = 6.8265e-04
Loss = 3.1494e-02, PNorm = 133.2073, GNorm = 0.5067, lr_0 = 6.8218e-04
Loss = 3.6721e-02, PNorm = 133.2718, GNorm = 0.3341, lr_0 = 6.8171e-04
Loss = 3.8525e-02, PNorm = 133.3462, GNorm = 0.4280, lr_0 = 6.8125e-04
Loss = 3.6077e-02, PNorm = 133.4103, GNorm = 0.5302, lr_0 = 6.8078e-04
Loss = 3.7724e-02, PNorm = 133.4808, GNorm = 0.4742, lr_0 = 6.8031e-04
Loss = 3.0702e-02, PNorm = 133.5442, GNorm = 0.5480, lr_0 = 6.7985e-04
Loss = 3.3346e-02, PNorm = 133.6097, GNorm = 0.3623, lr_0 = 6.7938e-04
Loss = 2.9602e-02, PNorm = 133.6724, GNorm = 0.5501, lr_0 = 6.7892e-04
Loss = 3.3908e-02, PNorm = 133.7356, GNorm = 0.2983, lr_0 = 6.7845e-04
Loss = 3.3406e-02, PNorm = 133.8046, GNorm = 0.4552, lr_0 = 6.7799e-04
Loss = 3.1233e-02, PNorm = 133.8687, GNorm = 0.5106, lr_0 = 6.7752e-04
Loss = 3.2647e-02, PNorm = 133.9291, GNorm = 0.3092, lr_0 = 6.7706e-04
Loss = 3.7066e-02, PNorm = 133.9878, GNorm = 0.2285, lr_0 = 6.7659e-04
Loss = 3.6933e-02, PNorm = 134.0557, GNorm = 0.3238, lr_0 = 6.7613e-04
Loss = 3.4732e-02, PNorm = 134.1284, GNorm = 0.1964, lr_0 = 6.7567e-04
Loss = 3.7198e-02, PNorm = 134.1945, GNorm = 0.6240, lr_0 = 6.7520e-04
Loss = 3.6266e-02, PNorm = 134.2731, GNorm = 0.1895, lr_0 = 6.7474e-04
Loss = 3.3969e-02, PNorm = 134.3466, GNorm = 0.7251, lr_0 = 6.7428e-04
Loss = 3.8536e-02, PNorm = 134.4139, GNorm = 0.3922, lr_0 = 6.7382e-04
Loss = 3.5036e-02, PNorm = 134.4832, GNorm = 0.4449, lr_0 = 6.7335e-04
Loss = 2.9942e-02, PNorm = 134.5493, GNorm = 0.7290, lr_0 = 6.7289e-04
Loss = 4.0988e-02, PNorm = 134.6234, GNorm = 0.2372, lr_0 = 6.7243e-04
Loss = 3.8316e-02, PNorm = 134.6986, GNorm = 0.3573, lr_0 = 6.7197e-04
Loss = 3.2719e-02, PNorm = 134.7686, GNorm = 0.3177, lr_0 = 6.7151e-04
Loss = 3.6038e-02, PNorm = 134.8349, GNorm = 0.6560, lr_0 = 6.7105e-04
Loss = 2.8858e-02, PNorm = 134.9007, GNorm = 0.3644, lr_0 = 6.7059e-04
Loss = 3.2084e-02, PNorm = 134.9624, GNorm = 0.6605, lr_0 = 6.7013e-04
Loss = 3.3629e-02, PNorm = 135.0246, GNorm = 0.4172, lr_0 = 6.6967e-04
Loss = 3.4489e-02, PNorm = 135.0912, GNorm = 0.2831, lr_0 = 6.6921e-04
Loss = 3.2463e-02, PNorm = 135.1600, GNorm = 0.4060, lr_0 = 6.6876e-04
Loss = 3.5024e-02, PNorm = 135.2299, GNorm = 0.2482, lr_0 = 6.6830e-04
Loss = 3.9937e-02, PNorm = 135.2956, GNorm = 0.3469, lr_0 = 6.6784e-04
Loss = 4.0795e-02, PNorm = 135.3661, GNorm = 0.5845, lr_0 = 6.6738e-04
Loss = 3.6555e-02, PNorm = 135.4405, GNorm = 0.3661, lr_0 = 6.6693e-04
Loss = 4.0978e-02, PNorm = 135.5231, GNorm = 0.2887, lr_0 = 6.6647e-04
Loss = 3.4063e-02, PNorm = 135.5992, GNorm = 0.3150, lr_0 = 6.6601e-04
Loss = 4.3079e-02, PNorm = 135.6751, GNorm = 0.6567, lr_0 = 6.6556e-04
Loss = 4.1360e-02, PNorm = 135.7525, GNorm = 0.4515, lr_0 = 6.6510e-04
Loss = 3.6604e-02, PNorm = 135.8304, GNorm = 0.4796, lr_0 = 6.6464e-04
Loss = 3.6168e-02, PNorm = 135.9041, GNorm = 0.5385, lr_0 = 6.6419e-04
Loss = 3.9026e-02, PNorm = 135.9846, GNorm = 0.4424, lr_0 = 6.6373e-04
Loss = 4.1813e-02, PNorm = 136.0572, GNorm = 0.7049, lr_0 = 6.6328e-04
Loss = 3.4981e-02, PNorm = 136.1331, GNorm = 0.6271, lr_0 = 6.6282e-04
Validation mae = 0.287716
Epoch 7
Loss = 2.9209e-02, PNorm = 136.1992, GNorm = 0.2829, lr_0 = 6.6237e-04
Loss = 2.4466e-02, PNorm = 136.2528, GNorm = 0.7802, lr_0 = 6.6192e-04
Loss = 2.7345e-02, PNorm = 136.3010, GNorm = 0.2773, lr_0 = 6.6146e-04
Loss = 3.0311e-02, PNorm = 136.3508, GNorm = 0.7792, lr_0 = 6.6101e-04
Loss = 2.5268e-02, PNorm = 136.3995, GNorm = 0.3344, lr_0 = 6.6056e-04
Loss = 2.7036e-02, PNorm = 136.4440, GNorm = 0.8174, lr_0 = 6.6011e-04
Loss = 2.8299e-02, PNorm = 136.4935, GNorm = 0.2867, lr_0 = 6.5965e-04
Loss = 2.6414e-02, PNorm = 136.5396, GNorm = 0.2803, lr_0 = 6.5920e-04
Loss = 2.7750e-02, PNorm = 136.5915, GNorm = 0.4419, lr_0 = 6.5875e-04
Loss = 2.4412e-02, PNorm = 136.6367, GNorm = 0.4083, lr_0 = 6.5830e-04
Loss = 2.3881e-02, PNorm = 136.6802, GNorm = 0.3359, lr_0 = 6.5785e-04
Loss = 2.6731e-02, PNorm = 136.7275, GNorm = 0.4735, lr_0 = 6.5740e-04
Loss = 2.6077e-02, PNorm = 136.7770, GNorm = 0.2118, lr_0 = 6.5695e-04
Loss = 3.2024e-02, PNorm = 136.8203, GNorm = 0.5035, lr_0 = 6.5650e-04
Loss = 2.4897e-02, PNorm = 136.8631, GNorm = 0.3863, lr_0 = 6.5605e-04
Loss = 3.3095e-02, PNorm = 136.9083, GNorm = 0.4123, lr_0 = 6.5560e-04
Loss = 2.6106e-02, PNorm = 136.9548, GNorm = 0.4055, lr_0 = 6.5515e-04
Loss = 2.6322e-02, PNorm = 137.0086, GNorm = 0.5683, lr_0 = 6.5470e-04
Loss = 2.6278e-02, PNorm = 137.0564, GNorm = 0.6123, lr_0 = 6.5425e-04
Loss = 2.6428e-02, PNorm = 137.1092, GNorm = 0.3467, lr_0 = 6.5380e-04
Loss = 2.3972e-02, PNorm = 137.1549, GNorm = 0.2784, lr_0 = 6.5335e-04
Loss = 2.3863e-02, PNorm = 137.1967, GNorm = 0.5200, lr_0 = 6.5291e-04
Loss = 2.8584e-02, PNorm = 137.2421, GNorm = 0.4261, lr_0 = 6.5246e-04
Loss = 2.3937e-02, PNorm = 137.2869, GNorm = 0.8521, lr_0 = 6.5201e-04
Loss = 2.9905e-02, PNorm = 137.3361, GNorm = 0.3611, lr_0 = 6.5157e-04
Loss = 2.1791e-02, PNorm = 137.3884, GNorm = 0.2668, lr_0 = 6.5112e-04
Loss = 2.5083e-02, PNorm = 137.4391, GNorm = 0.2513, lr_0 = 6.5067e-04
Loss = 2.5758e-02, PNorm = 137.4843, GNorm = 0.3383, lr_0 = 6.5023e-04
Loss = 2.4434e-02, PNorm = 137.5340, GNorm = 0.2724, lr_0 = 6.4978e-04
Loss = 2.8113e-02, PNorm = 137.5767, GNorm = 0.3856, lr_0 = 6.4934e-04
Loss = 2.8570e-02, PNorm = 137.6291, GNorm = 1.2802, lr_0 = 6.4889e-04
Loss = 2.5566e-02, PNorm = 137.6790, GNorm = 0.2830, lr_0 = 6.4845e-04
Loss = 2.9336e-02, PNorm = 137.7367, GNorm = 0.1602, lr_0 = 6.4800e-04
Loss = 2.6621e-02, PNorm = 137.7931, GNorm = 0.4894, lr_0 = 6.4756e-04
Loss = 2.2906e-02, PNorm = 137.8472, GNorm = 0.3158, lr_0 = 6.4712e-04
Loss = 2.2816e-02, PNorm = 137.9006, GNorm = 0.4475, lr_0 = 6.4667e-04
Loss = 2.4030e-02, PNorm = 137.9498, GNorm = 0.3778, lr_0 = 6.4623e-04
Loss = 2.7058e-02, PNorm = 137.9960, GNorm = 0.3361, lr_0 = 6.4579e-04
Loss = 2.2340e-02, PNorm = 138.0473, GNorm = 0.3999, lr_0 = 6.4534e-04
Loss = 2.7446e-02, PNorm = 138.0953, GNorm = 0.3991, lr_0 = 6.4490e-04
Loss = 3.0565e-02, PNorm = 138.1474, GNorm = 0.5905, lr_0 = 6.4446e-04
Loss = 2.5551e-02, PNorm = 138.1955, GNorm = 0.4080, lr_0 = 6.4402e-04
Loss = 2.4669e-02, PNorm = 138.2496, GNorm = 0.3234, lr_0 = 6.4358e-04
Loss = 1.9929e-02, PNorm = 138.3018, GNorm = 0.4360, lr_0 = 6.4314e-04
Loss = 2.9845e-02, PNorm = 138.3509, GNorm = 0.5509, lr_0 = 6.4270e-04
Loss = 2.1235e-02, PNorm = 138.3956, GNorm = 0.4073, lr_0 = 6.4226e-04
Loss = 2.5025e-02, PNorm = 138.4449, GNorm = 0.7149, lr_0 = 6.4182e-04
Loss = 2.4284e-02, PNorm = 138.4941, GNorm = 0.4094, lr_0 = 6.4138e-04
Loss = 2.5170e-02, PNorm = 138.5419, GNorm = 0.5741, lr_0 = 6.4094e-04
Loss = 2.8353e-02, PNorm = 138.5935, GNorm = 0.7015, lr_0 = 6.4050e-04
Loss = 3.0605e-02, PNorm = 138.6516, GNorm = 0.4486, lr_0 = 6.4006e-04
Loss = 2.0668e-02, PNorm = 138.7072, GNorm = 0.2369, lr_0 = 6.3962e-04
Loss = 2.4353e-02, PNorm = 138.7511, GNorm = 0.3050, lr_0 = 6.3918e-04
Loss = 2.5024e-02, PNorm = 138.7985, GNorm = 0.2812, lr_0 = 6.3874e-04
Loss = 2.7794e-02, PNorm = 138.8456, GNorm = 0.3427, lr_0 = 6.3831e-04
Loss = 2.5196e-02, PNorm = 138.8965, GNorm = 0.7216, lr_0 = 6.3787e-04
Loss = 2.5718e-02, PNorm = 138.9479, GNorm = 0.3668, lr_0 = 6.3743e-04
Loss = 2.4342e-02, PNorm = 139.0005, GNorm = 0.3022, lr_0 = 6.3700e-04
Loss = 2.4057e-02, PNorm = 139.0478, GNorm = 0.2119, lr_0 = 6.3656e-04
Loss = 2.5494e-02, PNorm = 139.1010, GNorm = 0.3137, lr_0 = 6.3612e-04
Loss = 2.2880e-02, PNorm = 139.1481, GNorm = 0.1783, lr_0 = 6.3569e-04
Loss = 2.7015e-02, PNorm = 139.1948, GNorm = 0.7556, lr_0 = 6.3525e-04
Loss = 2.8731e-02, PNorm = 139.2458, GNorm = 0.2946, lr_0 = 6.3482e-04
Loss = 2.8991e-02, PNorm = 139.2918, GNorm = 1.1326, lr_0 = 6.3438e-04
Loss = 3.3338e-02, PNorm = 139.3460, GNorm = 0.7346, lr_0 = 6.3395e-04
Loss = 3.3733e-02, PNorm = 139.4075, GNorm = 0.2663, lr_0 = 6.3351e-04
Loss = 2.6974e-02, PNorm = 139.4638, GNorm = 0.5333, lr_0 = 6.3308e-04
Loss = 2.5454e-02, PNorm = 139.5221, GNorm = 0.2612, lr_0 = 6.3265e-04
Loss = 2.8107e-02, PNorm = 139.5790, GNorm = 0.5162, lr_0 = 6.3221e-04
Loss = 2.5250e-02, PNorm = 139.6361, GNorm = 0.1809, lr_0 = 6.3178e-04
Loss = 2.2970e-02, PNorm = 139.6928, GNorm = 0.2715, lr_0 = 6.3135e-04
Loss = 2.7516e-02, PNorm = 139.7496, GNorm = 0.7461, lr_0 = 6.3091e-04
Loss = 2.7614e-02, PNorm = 139.8095, GNorm = 0.4440, lr_0 = 6.3048e-04
Loss = 2.7758e-02, PNorm = 139.8698, GNorm = 0.4698, lr_0 = 6.3005e-04
Loss = 2.6655e-02, PNorm = 139.9312, GNorm = 0.5331, lr_0 = 6.2962e-04
Loss = 2.6550e-02, PNorm = 139.9889, GNorm = 0.5289, lr_0 = 6.2919e-04
Loss = 3.2306e-02, PNorm = 140.0470, GNorm = 0.4470, lr_0 = 6.2876e-04
Loss = 2.9469e-02, PNorm = 140.1112, GNorm = 0.3952, lr_0 = 6.2833e-04
Loss = 2.4572e-02, PNorm = 140.1678, GNorm = 0.2773, lr_0 = 6.2789e-04
Loss = 2.5043e-02, PNorm = 140.2313, GNorm = 0.6537, lr_0 = 6.2746e-04
Loss = 3.0087e-02, PNorm = 140.2854, GNorm = 0.6034, lr_0 = 6.2703e-04
Loss = 2.8954e-02, PNorm = 140.3437, GNorm = 0.3331, lr_0 = 6.2661e-04
Loss = 2.7429e-02, PNorm = 140.4025, GNorm = 0.4436, lr_0 = 6.2618e-04
Loss = 3.2570e-02, PNorm = 140.4671, GNorm = 0.9675, lr_0 = 6.2575e-04
Loss = 3.1752e-02, PNorm = 140.5339, GNorm = 0.4989, lr_0 = 6.2532e-04
Loss = 2.9109e-02, PNorm = 140.6021, GNorm = 1.0374, lr_0 = 6.2489e-04
Loss = 2.7249e-02, PNorm = 140.6589, GNorm = 0.4596, lr_0 = 6.2446e-04
Loss = 3.1850e-02, PNorm = 140.7166, GNorm = 0.4607, lr_0 = 6.2403e-04
Loss = 2.6715e-02, PNorm = 140.7797, GNorm = 0.4568, lr_0 = 6.2361e-04
Loss = 2.4938e-02, PNorm = 140.8422, GNorm = 0.5401, lr_0 = 6.2318e-04
Loss = 2.3338e-02, PNorm = 140.9002, GNorm = 0.2484, lr_0 = 6.2275e-04
Loss = 2.8994e-02, PNorm = 140.9554, GNorm = 0.3110, lr_0 = 6.2233e-04
Loss = 3.1339e-02, PNorm = 141.0219, GNorm = 0.5666, lr_0 = 6.2190e-04
Loss = 2.6849e-02, PNorm = 141.0833, GNorm = 0.4547, lr_0 = 6.2147e-04
Loss = 2.8094e-02, PNorm = 141.1456, GNorm = 0.4420, lr_0 = 6.2105e-04
Loss = 2.6238e-02, PNorm = 141.2013, GNorm = 0.2851, lr_0 = 6.2062e-04
Loss = 2.8370e-02, PNorm = 141.2638, GNorm = 0.4067, lr_0 = 6.2020e-04
Loss = 2.9307e-02, PNorm = 141.3236, GNorm = 0.4054, lr_0 = 6.1977e-04
Loss = 3.0052e-02, PNorm = 141.3863, GNorm = 0.3490, lr_0 = 6.1935e-04
Loss = 3.2069e-02, PNorm = 141.4450, GNorm = 0.6927, lr_0 = 6.1892e-04
Loss = 2.9698e-02, PNorm = 141.5009, GNorm = 0.3647, lr_0 = 6.1850e-04
Loss = 2.3046e-02, PNorm = 141.5609, GNorm = 0.3181, lr_0 = 6.1808e-04
Loss = 2.6823e-02, PNorm = 141.6149, GNorm = 0.5244, lr_0 = 6.1765e-04
Loss = 3.2892e-02, PNorm = 141.6765, GNorm = 0.2946, lr_0 = 6.1723e-04
Loss = 2.8214e-02, PNorm = 141.7385, GNorm = 0.2176, lr_0 = 6.1681e-04
Loss = 2.7981e-02, PNorm = 141.7998, GNorm = 0.2898, lr_0 = 6.1638e-04
Loss = 2.8235e-02, PNorm = 141.8572, GNorm = 0.2046, lr_0 = 6.1596e-04
Loss = 2.5441e-02, PNorm = 141.9117, GNorm = 0.2906, lr_0 = 6.1554e-04
Loss = 3.0687e-02, PNorm = 141.9678, GNorm = 0.4422, lr_0 = 6.1512e-04
Loss = 2.7730e-02, PNorm = 142.0309, GNorm = 0.6234, lr_0 = 6.1470e-04
Loss = 2.6031e-02, PNorm = 142.0972, GNorm = 0.2468, lr_0 = 6.1428e-04
Loss = 2.7817e-02, PNorm = 142.1595, GNorm = 0.4578, lr_0 = 6.1385e-04
Loss = 2.8183e-02, PNorm = 142.2266, GNorm = 0.9584, lr_0 = 6.1343e-04
Loss = 2.6262e-02, PNorm = 142.2876, GNorm = 0.1983, lr_0 = 6.1301e-04
Loss = 2.2686e-02, PNorm = 142.3476, GNorm = 0.1879, lr_0 = 6.1259e-04
Loss = 3.2195e-02, PNorm = 142.4009, GNorm = 0.5879, lr_0 = 6.1217e-04
Loss = 3.2868e-02, PNorm = 142.4683, GNorm = 0.4442, lr_0 = 6.1175e-04
Loss = 2.6905e-02, PNorm = 142.5292, GNorm = 0.3035, lr_0 = 6.1134e-04
Loss = 2.3880e-02, PNorm = 142.5893, GNorm = 0.3791, lr_0 = 6.1092e-04
Loss = 3.0019e-02, PNorm = 142.6467, GNorm = 0.4540, lr_0 = 6.1050e-04
Validation mae = 0.284984
Epoch 8
Loss = 2.4235e-02, PNorm = 142.6906, GNorm = 0.3805, lr_0 = 6.1008e-04
Loss = 3.1029e-02, PNorm = 142.7439, GNorm = 0.5453, lr_0 = 6.0966e-04
Loss = 2.3978e-02, PNorm = 142.7871, GNorm = 0.7329, lr_0 = 6.0924e-04
Loss = 2.1764e-02, PNorm = 142.8339, GNorm = 0.2682, lr_0 = 6.0883e-04
Loss = 2.3632e-02, PNorm = 142.8721, GNorm = 0.1764, lr_0 = 6.0841e-04
Loss = 2.3991e-02, PNorm = 142.9099, GNorm = 0.2936, lr_0 = 6.0799e-04
Loss = 2.0375e-02, PNorm = 142.9470, GNorm = 0.2911, lr_0 = 6.0758e-04
Loss = 2.1827e-02, PNorm = 142.9841, GNorm = 0.3045, lr_0 = 6.0716e-04
Loss = 2.1298e-02, PNorm = 143.0193, GNorm = 0.3122, lr_0 = 6.0674e-04
Loss = 2.2346e-02, PNorm = 143.0536, GNorm = 0.1933, lr_0 = 6.0633e-04
Loss = 1.9524e-02, PNorm = 143.0920, GNorm = 0.2917, lr_0 = 6.0591e-04
Loss = 2.4208e-02, PNorm = 143.1273, GNorm = 0.3966, lr_0 = 6.0550e-04
Loss = 2.3997e-02, PNorm = 143.1685, GNorm = 0.5534, lr_0 = 6.0508e-04
Loss = 2.1986e-02, PNorm = 143.2137, GNorm = 0.3741, lr_0 = 6.0467e-04
Loss = 2.5098e-02, PNorm = 143.2621, GNorm = 0.7477, lr_0 = 6.0425e-04
Loss = 2.3349e-02, PNorm = 143.3009, GNorm = 0.3102, lr_0 = 6.0384e-04
Loss = 1.9761e-02, PNorm = 143.3420, GNorm = 0.3716, lr_0 = 6.0343e-04
Loss = 2.3031e-02, PNorm = 143.3848, GNorm = 0.3381, lr_0 = 6.0301e-04
Loss = 2.0370e-02, PNorm = 143.4253, GNorm = 0.3483, lr_0 = 6.0260e-04
Loss = 1.9970e-02, PNorm = 143.4672, GNorm = 0.4482, lr_0 = 6.0219e-04
Loss = 1.9705e-02, PNorm = 143.5102, GNorm = 0.7721, lr_0 = 6.0178e-04
Loss = 2.1252e-02, PNorm = 143.5544, GNorm = 0.5254, lr_0 = 6.0136e-04
Loss = 2.1062e-02, PNorm = 143.6044, GNorm = 0.7190, lr_0 = 6.0095e-04
Loss = 2.0843e-02, PNorm = 143.6450, GNorm = 0.6543, lr_0 = 6.0054e-04
Loss = 2.3726e-02, PNorm = 143.6854, GNorm = 0.4197, lr_0 = 6.0013e-04
Loss = 2.6237e-02, PNorm = 143.7269, GNorm = 0.4756, lr_0 = 5.9972e-04
Loss = 2.0582e-02, PNorm = 143.7769, GNorm = 0.3743, lr_0 = 5.9931e-04
Loss = 2.7125e-02, PNorm = 143.8238, GNorm = 0.6074, lr_0 = 5.9890e-04
Loss = 2.0034e-02, PNorm = 143.8659, GNorm = 0.8186, lr_0 = 5.9849e-04
Loss = 2.0016e-02, PNorm = 143.9078, GNorm = 0.1970, lr_0 = 5.9808e-04
Loss = 2.2182e-02, PNorm = 143.9445, GNorm = 0.3352, lr_0 = 5.9767e-04
Loss = 1.8233e-02, PNorm = 143.9853, GNorm = 0.1817, lr_0 = 5.9726e-04
Loss = 2.6084e-02, PNorm = 144.0215, GNorm = 0.6823, lr_0 = 5.9685e-04
Loss = 2.0728e-02, PNorm = 144.0681, GNorm = 0.6148, lr_0 = 5.9644e-04
Loss = 2.4490e-02, PNorm = 144.1121, GNorm = 0.3254, lr_0 = 5.9603e-04
Loss = 2.2285e-02, PNorm = 144.1640, GNorm = 0.6732, lr_0 = 5.9562e-04
Loss = 1.9243e-02, PNorm = 144.2094, GNorm = 0.2228, lr_0 = 5.9521e-04
Loss = 2.4392e-02, PNorm = 144.2601, GNorm = 0.2305, lr_0 = 5.9481e-04
Loss = 2.6010e-02, PNorm = 144.3153, GNorm = 0.3380, lr_0 = 5.9440e-04
Loss = 2.2882e-02, PNorm = 144.3667, GNorm = 0.6256, lr_0 = 5.9399e-04
Loss = 2.0084e-02, PNorm = 144.4155, GNorm = 0.2199, lr_0 = 5.9358e-04
Loss = 1.7232e-02, PNorm = 144.4545, GNorm = 0.2497, lr_0 = 5.9318e-04
Loss = 2.0130e-02, PNorm = 144.4977, GNorm = 1.0372, lr_0 = 5.9277e-04
Loss = 2.0883e-02, PNorm = 144.5338, GNorm = 0.2753, lr_0 = 5.9236e-04
Loss = 2.0374e-02, PNorm = 144.5781, GNorm = 0.3808, lr_0 = 5.9196e-04
Loss = 2.2541e-02, PNorm = 144.6175, GNorm = 0.4211, lr_0 = 5.9155e-04
Loss = 2.4681e-02, PNorm = 144.6649, GNorm = 0.3962, lr_0 = 5.9115e-04
Loss = 2.1521e-02, PNorm = 144.7134, GNorm = 0.1932, lr_0 = 5.9074e-04
Loss = 2.4003e-02, PNorm = 144.7613, GNorm = 0.7492, lr_0 = 5.9034e-04
Loss = 1.9610e-02, PNorm = 144.8077, GNorm = 0.3102, lr_0 = 5.8993e-04
Loss = 2.4994e-02, PNorm = 144.8527, GNorm = 0.4679, lr_0 = 5.8953e-04
Loss = 2.6222e-02, PNorm = 144.8984, GNorm = 0.3982, lr_0 = 5.8913e-04
Loss = 2.2593e-02, PNorm = 144.9438, GNorm = 0.3408, lr_0 = 5.8872e-04
Loss = 2.2461e-02, PNorm = 144.9886, GNorm = 0.1672, lr_0 = 5.8832e-04
Loss = 2.3900e-02, PNorm = 145.0323, GNorm = 0.3709, lr_0 = 5.8792e-04
Loss = 2.0861e-02, PNorm = 145.0822, GNorm = 0.3331, lr_0 = 5.8751e-04
Loss = 2.2729e-02, PNorm = 145.1268, GNorm = 0.3970, lr_0 = 5.8711e-04
Loss = 2.2588e-02, PNorm = 145.1791, GNorm = 0.5569, lr_0 = 5.8671e-04
Loss = 2.2572e-02, PNorm = 145.2231, GNorm = 0.3618, lr_0 = 5.8631e-04
Loss = 2.0627e-02, PNorm = 145.2646, GNorm = 0.2958, lr_0 = 5.8591e-04
Loss = 2.2743e-02, PNorm = 145.3041, GNorm = 0.2584, lr_0 = 5.8550e-04
Loss = 2.5042e-02, PNorm = 145.3538, GNorm = 0.5717, lr_0 = 5.8510e-04
Loss = 2.1394e-02, PNorm = 145.4013, GNorm = 0.2724, lr_0 = 5.8470e-04
Loss = 2.5395e-02, PNorm = 145.4543, GNorm = 0.5666, lr_0 = 5.8430e-04
Loss = 2.2457e-02, PNorm = 145.5070, GNorm = 0.5781, lr_0 = 5.8390e-04
Loss = 2.1084e-02, PNorm = 145.5598, GNorm = 0.2378, lr_0 = 5.8350e-04
Loss = 2.4334e-02, PNorm = 145.6090, GNorm = 0.7306, lr_0 = 5.8310e-04
Loss = 2.3220e-02, PNorm = 145.6579, GNorm = 0.1493, lr_0 = 5.8270e-04
Loss = 2.7726e-02, PNorm = 145.7098, GNorm = 0.2212, lr_0 = 5.8230e-04
Loss = 2.2396e-02, PNorm = 145.7567, GNorm = 0.2073, lr_0 = 5.8190e-04
Loss = 2.2332e-02, PNorm = 145.8021, GNorm = 0.3538, lr_0 = 5.8151e-04
Loss = 2.1737e-02, PNorm = 145.8480, GNorm = 0.2067, lr_0 = 5.8111e-04
Loss = 1.9548e-02, PNorm = 145.8954, GNorm = 0.1847, lr_0 = 5.8071e-04
Loss = 2.2633e-02, PNorm = 145.9420, GNorm = 0.5743, lr_0 = 5.8031e-04
Loss = 2.0579e-02, PNorm = 145.9884, GNorm = 0.9174, lr_0 = 5.7991e-04
Loss = 2.4161e-02, PNorm = 146.0367, GNorm = 0.4599, lr_0 = 5.7952e-04
Loss = 2.6883e-02, PNorm = 146.0810, GNorm = 0.3831, lr_0 = 5.7912e-04
Loss = 1.8327e-02, PNorm = 146.1298, GNorm = 0.1819, lr_0 = 5.7872e-04
Loss = 2.2810e-02, PNorm = 146.1768, GNorm = 0.3403, lr_0 = 5.7833e-04
Loss = 1.9938e-02, PNorm = 146.2236, GNorm = 0.2971, lr_0 = 5.7793e-04
Loss = 2.1637e-02, PNorm = 146.2690, GNorm = 0.8058, lr_0 = 5.7753e-04
Loss = 2.2643e-02, PNorm = 146.3144, GNorm = 0.3975, lr_0 = 5.7714e-04
Loss = 2.2618e-02, PNorm = 146.3563, GNorm = 0.1790, lr_0 = 5.7674e-04
Loss = 2.5990e-02, PNorm = 146.4065, GNorm = 0.2996, lr_0 = 5.7635e-04
Loss = 2.5806e-02, PNorm = 146.4508, GNorm = 0.1971, lr_0 = 5.7595e-04
Loss = 2.0319e-02, PNorm = 146.5015, GNorm = 0.5009, lr_0 = 5.7556e-04
Loss = 1.9666e-02, PNorm = 146.5497, GNorm = 0.1968, lr_0 = 5.7516e-04
Loss = 2.2964e-02, PNorm = 146.6060, GNorm = 0.9204, lr_0 = 5.7477e-04
Loss = 2.1279e-02, PNorm = 146.6508, GNorm = 0.6322, lr_0 = 5.7438e-04
Loss = 1.8035e-02, PNorm = 146.7001, GNorm = 0.4373, lr_0 = 5.7398e-04
Loss = 2.2889e-02, PNorm = 146.7422, GNorm = 0.4282, lr_0 = 5.7359e-04
Loss = 2.4370e-02, PNorm = 146.7894, GNorm = 0.2218, lr_0 = 5.7320e-04
Loss = 2.1344e-02, PNorm = 146.8340, GNorm = 0.8919, lr_0 = 5.7280e-04
Loss = 2.0477e-02, PNorm = 146.8871, GNorm = 0.3828, lr_0 = 5.7241e-04
Loss = 2.2578e-02, PNorm = 146.9316, GNorm = 0.4370, lr_0 = 5.7202e-04
Loss = 2.0587e-02, PNorm = 146.9823, GNorm = 0.3295, lr_0 = 5.7163e-04
Loss = 2.1944e-02, PNorm = 147.0271, GNorm = 0.2472, lr_0 = 5.7124e-04
Loss = 1.9620e-02, PNorm = 147.0713, GNorm = 0.2238, lr_0 = 5.7084e-04
Loss = 2.3555e-02, PNorm = 147.1181, GNorm = 0.2317, lr_0 = 5.7045e-04
Loss = 1.8394e-02, PNorm = 147.1683, GNorm = 0.5462, lr_0 = 5.7006e-04
Loss = 1.9210e-02, PNorm = 147.2163, GNorm = 0.2200, lr_0 = 5.6967e-04
Loss = 2.3975e-02, PNorm = 147.2655, GNorm = 0.3878, lr_0 = 5.6928e-04
Loss = 2.4042e-02, PNorm = 147.3179, GNorm = 0.3787, lr_0 = 5.6889e-04
Loss = 2.3884e-02, PNorm = 147.3700, GNorm = 0.6409, lr_0 = 5.6850e-04
Loss = 2.0766e-02, PNorm = 147.4198, GNorm = 0.2029, lr_0 = 5.6811e-04
Loss = 2.2241e-02, PNorm = 147.4636, GNorm = 0.2580, lr_0 = 5.6772e-04
Loss = 2.3311e-02, PNorm = 147.5126, GNorm = 0.3649, lr_0 = 5.6733e-04
Loss = 2.3407e-02, PNorm = 147.5633, GNorm = 0.3120, lr_0 = 5.6695e-04
Loss = 2.5926e-02, PNorm = 147.6149, GNorm = 0.1977, lr_0 = 5.6656e-04
Loss = 2.2108e-02, PNorm = 147.6657, GNorm = 0.8947, lr_0 = 5.6617e-04
Loss = 1.9385e-02, PNorm = 147.7141, GNorm = 0.5017, lr_0 = 5.6578e-04
Loss = 2.1842e-02, PNorm = 147.7615, GNorm = 0.2036, lr_0 = 5.6539e-04
Loss = 2.1716e-02, PNorm = 147.8116, GNorm = 0.2402, lr_0 = 5.6501e-04
Loss = 2.1362e-02, PNorm = 147.8659, GNorm = 0.1402, lr_0 = 5.6462e-04
Loss = 2.3524e-02, PNorm = 147.9220, GNorm = 0.2894, lr_0 = 5.6423e-04
Loss = 2.5674e-02, PNorm = 147.9748, GNorm = 0.3579, lr_0 = 5.6385e-04
Loss = 2.6307e-02, PNorm = 148.0325, GNorm = 0.2734, lr_0 = 5.6346e-04
Loss = 2.3986e-02, PNorm = 148.0930, GNorm = 0.3587, lr_0 = 5.6307e-04
Loss = 2.4802e-02, PNorm = 148.1459, GNorm = 0.3969, lr_0 = 5.6269e-04
Loss = 2.9321e-02, PNorm = 148.1964, GNorm = 0.2058, lr_0 = 5.6230e-04
Validation mae = 0.285195
Epoch 9
Loss = 2.3065e-02, PNorm = 148.2389, GNorm = 0.8726, lr_0 = 5.6192e-04
Loss = 2.3640e-02, PNorm = 148.2802, GNorm = 0.5295, lr_0 = 5.6153e-04
Loss = 1.5236e-02, PNorm = 148.3216, GNorm = 0.2471, lr_0 = 5.6115e-04
Loss = 2.1967e-02, PNorm = 148.3595, GNorm = 0.4521, lr_0 = 5.6076e-04
Loss = 1.9554e-02, PNorm = 148.3947, GNorm = 0.4616, lr_0 = 5.6038e-04
Loss = 2.0032e-02, PNorm = 148.4294, GNorm = 0.9298, lr_0 = 5.6000e-04
Loss = 1.7053e-02, PNorm = 148.4712, GNorm = 0.5232, lr_0 = 5.5961e-04
Loss = 1.6740e-02, PNorm = 148.5050, GNorm = 0.2584, lr_0 = 5.5923e-04
Loss = 1.7542e-02, PNorm = 148.5416, GNorm = 0.2893, lr_0 = 5.5885e-04
Loss = 1.6936e-02, PNorm = 148.5752, GNorm = 0.5194, lr_0 = 5.5846e-04
Loss = 2.4275e-02, PNorm = 148.6060, GNorm = 0.6271, lr_0 = 5.5808e-04
Loss = 1.7933e-02, PNorm = 148.6396, GNorm = 0.3018, lr_0 = 5.5770e-04
Loss = 2.0719e-02, PNorm = 148.6748, GNorm = 1.3225, lr_0 = 5.5732e-04
Loss = 1.6282e-02, PNorm = 148.7065, GNorm = 0.3668, lr_0 = 5.5693e-04
Loss = 1.6644e-02, PNorm = 148.7395, GNorm = 0.3468, lr_0 = 5.5655e-04
Loss = 2.2717e-02, PNorm = 148.7711, GNorm = 0.7980, lr_0 = 5.5617e-04
Loss = 1.8983e-02, PNorm = 148.8086, GNorm = 0.3938, lr_0 = 5.5579e-04
Loss = 1.8841e-02, PNorm = 148.8361, GNorm = 1.0603, lr_0 = 5.5541e-04
Loss = 1.7729e-02, PNorm = 148.8736, GNorm = 0.2612, lr_0 = 5.5503e-04
Loss = 1.5452e-02, PNorm = 148.9085, GNorm = 0.3824, lr_0 = 5.5465e-04
Loss = 1.8303e-02, PNorm = 148.9488, GNorm = 0.7166, lr_0 = 5.5427e-04
Loss = 1.7087e-02, PNorm = 148.9829, GNorm = 0.4115, lr_0 = 5.5389e-04
Loss = 1.9258e-02, PNorm = 149.0117, GNorm = 0.7244, lr_0 = 5.5351e-04
Loss = 2.0335e-02, PNorm = 149.0449, GNorm = 0.4191, lr_0 = 5.5313e-04
Loss = 1.8104e-02, PNorm = 149.0763, GNorm = 0.2667, lr_0 = 5.5275e-04
Loss = 1.9455e-02, PNorm = 149.1137, GNorm = 0.1732, lr_0 = 5.5237e-04
Loss = 1.8303e-02, PNorm = 149.1553, GNorm = 0.3180, lr_0 = 5.5199e-04
Loss = 1.6574e-02, PNorm = 149.1974, GNorm = 0.3689, lr_0 = 5.5162e-04
Loss = 1.5335e-02, PNorm = 149.2343, GNorm = 0.2237, lr_0 = 5.5124e-04
Loss = 1.6773e-02, PNorm = 149.2674, GNorm = 0.4733, lr_0 = 5.5086e-04
Loss = 1.4722e-02, PNorm = 149.2991, GNorm = 0.1422, lr_0 = 5.5048e-04
Loss = 1.4644e-02, PNorm = 149.3302, GNorm = 0.1039, lr_0 = 5.5011e-04
Loss = 1.8713e-02, PNorm = 149.3621, GNorm = 0.2260, lr_0 = 5.4973e-04
Loss = 1.7018e-02, PNorm = 149.3972, GNorm = 0.4729, lr_0 = 5.4935e-04
Loss = 2.0764e-02, PNorm = 149.4359, GNorm = 0.3637, lr_0 = 5.4898e-04
Loss = 2.0908e-02, PNorm = 149.4745, GNorm = 0.7862, lr_0 = 5.4860e-04
Loss = 1.9731e-02, PNorm = 149.5133, GNorm = 0.6187, lr_0 = 5.4822e-04
Loss = 1.9840e-02, PNorm = 149.5463, GNorm = 0.5416, lr_0 = 5.4785e-04
Loss = 1.8688e-02, PNorm = 149.5834, GNorm = 0.1841, lr_0 = 5.4747e-04
Loss = 2.0428e-02, PNorm = 149.6188, GNorm = 0.4498, lr_0 = 5.4710e-04
Loss = 1.5847e-02, PNorm = 149.6573, GNorm = 0.5469, lr_0 = 5.4672e-04
Loss = 1.7941e-02, PNorm = 149.6940, GNorm = 0.1982, lr_0 = 5.4635e-04
Loss = 1.9670e-02, PNorm = 149.7364, GNorm = 0.2634, lr_0 = 5.4597e-04
Loss = 1.7751e-02, PNorm = 149.7717, GNorm = 0.3838, lr_0 = 5.4560e-04
Loss = 1.6665e-02, PNorm = 149.8052, GNorm = 0.3554, lr_0 = 5.4523e-04
Loss = 1.6411e-02, PNorm = 149.8353, GNorm = 0.4704, lr_0 = 5.4485e-04
Loss = 1.6622e-02, PNorm = 149.8676, GNorm = 0.3441, lr_0 = 5.4448e-04
Loss = 1.5648e-02, PNorm = 149.9018, GNorm = 0.3428, lr_0 = 5.4411e-04
Loss = 1.5917e-02, PNorm = 149.9345, GNorm = 0.2373, lr_0 = 5.4373e-04
Loss = 1.7421e-02, PNorm = 149.9697, GNorm = 0.3673, lr_0 = 5.4336e-04
Loss = 1.7873e-02, PNorm = 150.0045, GNorm = 0.4756, lr_0 = 5.4299e-04
Loss = 1.9959e-02, PNorm = 150.0451, GNorm = 0.4631, lr_0 = 5.4262e-04
Loss = 1.6391e-02, PNorm = 150.0859, GNorm = 0.6108, lr_0 = 5.4225e-04
Loss = 1.7519e-02, PNorm = 150.1247, GNorm = 0.4947, lr_0 = 5.4187e-04
Loss = 1.7510e-02, PNorm = 150.1592, GNorm = 0.1413, lr_0 = 5.4150e-04
Loss = 1.5289e-02, PNorm = 150.1953, GNorm = 0.6982, lr_0 = 5.4113e-04
Loss = 1.6596e-02, PNorm = 150.2308, GNorm = 0.4055, lr_0 = 5.4076e-04
Loss = 1.8604e-02, PNorm = 150.2721, GNorm = 0.3100, lr_0 = 5.4039e-04
Loss = 1.7253e-02, PNorm = 150.3145, GNorm = 0.7776, lr_0 = 5.4002e-04
Loss = 1.6826e-02, PNorm = 150.3552, GNorm = 0.2337, lr_0 = 5.3965e-04
Loss = 1.7529e-02, PNorm = 150.3931, GNorm = 0.4175, lr_0 = 5.3928e-04
Loss = 1.8323e-02, PNorm = 150.4357, GNorm = 0.9142, lr_0 = 5.3891e-04
Loss = 1.8501e-02, PNorm = 150.4672, GNorm = 0.2975, lr_0 = 5.3854e-04
Loss = 1.6191e-02, PNorm = 150.5070, GNorm = 0.1456, lr_0 = 5.3817e-04
Loss = 1.8600e-02, PNorm = 150.5471, GNorm = 0.8351, lr_0 = 5.3781e-04
Loss = 2.6690e-02, PNorm = 150.5826, GNorm = 0.1477, lr_0 = 5.3744e-04
Loss = 2.1412e-02, PNorm = 150.6221, GNorm = 0.3142, lr_0 = 5.3707e-04
Loss = 2.2293e-02, PNorm = 150.6609, GNorm = 0.2006, lr_0 = 5.3670e-04
Loss = 1.5899e-02, PNorm = 150.7060, GNorm = 0.2627, lr_0 = 5.3633e-04
Loss = 1.7208e-02, PNorm = 150.7490, GNorm = 0.2582, lr_0 = 5.3597e-04
Loss = 1.5141e-02, PNorm = 150.7896, GNorm = 0.2088, lr_0 = 5.3560e-04
Loss = 1.5537e-02, PNorm = 150.8296, GNorm = 0.2630, lr_0 = 5.3523e-04
Loss = 1.8780e-02, PNorm = 150.8692, GNorm = 0.3351, lr_0 = 5.3486e-04
Loss = 1.7503e-02, PNorm = 150.9010, GNorm = 0.8283, lr_0 = 5.3450e-04
Loss = 1.9396e-02, PNorm = 150.9394, GNorm = 0.2702, lr_0 = 5.3413e-04
Loss = 1.6287e-02, PNorm = 150.9746, GNorm = 0.2605, lr_0 = 5.3377e-04
Loss = 1.6954e-02, PNorm = 151.0107, GNorm = 0.3049, lr_0 = 5.3340e-04
Loss = 1.8295e-02, PNorm = 151.0478, GNorm = 0.3194, lr_0 = 5.3304e-04
Loss = 1.9741e-02, PNorm = 151.0911, GNorm = 0.2836, lr_0 = 5.3267e-04
Loss = 1.7603e-02, PNorm = 151.1259, GNorm = 0.3167, lr_0 = 5.3231e-04
Loss = 2.0913e-02, PNorm = 151.1653, GNorm = 0.3798, lr_0 = 5.3194e-04
Loss = 1.8697e-02, PNorm = 151.2003, GNorm = 0.2110, lr_0 = 5.3158e-04
Loss = 1.3606e-02, PNorm = 151.2374, GNorm = 0.3078, lr_0 = 5.3121e-04
Loss = 1.6697e-02, PNorm = 151.2752, GNorm = 0.2552, lr_0 = 5.3085e-04
Loss = 1.8798e-02, PNorm = 151.3130, GNorm = 0.2013, lr_0 = 5.3048e-04
Loss = 1.8476e-02, PNorm = 151.3502, GNorm = 0.2184, lr_0 = 5.3012e-04
Loss = 1.9091e-02, PNorm = 151.3887, GNorm = 0.4062, lr_0 = 5.2976e-04
Loss = 1.7607e-02, PNorm = 151.4324, GNorm = 0.2706, lr_0 = 5.2939e-04
Loss = 1.4791e-02, PNorm = 151.4734, GNorm = 0.3410, lr_0 = 5.2903e-04
Loss = 1.7439e-02, PNorm = 151.5085, GNorm = 0.3007, lr_0 = 5.2867e-04
Loss = 1.7585e-02, PNorm = 151.5481, GNorm = 0.2919, lr_0 = 5.2831e-04
Loss = 1.7920e-02, PNorm = 151.5904, GNorm = 0.2988, lr_0 = 5.2795e-04
Loss = 1.9191e-02, PNorm = 151.6316, GNorm = 0.1274, lr_0 = 5.2758e-04
Loss = 1.8405e-02, PNorm = 151.6718, GNorm = 0.4388, lr_0 = 5.2722e-04
Loss = 1.6516e-02, PNorm = 151.7160, GNorm = 0.3454, lr_0 = 5.2686e-04
Loss = 1.8256e-02, PNorm = 151.7583, GNorm = 0.5159, lr_0 = 5.2650e-04
Loss = 1.6688e-02, PNorm = 151.8007, GNorm = 0.3889, lr_0 = 5.2614e-04
Loss = 2.0127e-02, PNorm = 151.8354, GNorm = 0.4218, lr_0 = 5.2578e-04
Loss = 2.0146e-02, PNorm = 151.8822, GNorm = 0.2946, lr_0 = 5.2542e-04
Loss = 1.7415e-02, PNorm = 151.9263, GNorm = 0.3357, lr_0 = 5.2506e-04
Loss = 1.6276e-02, PNorm = 151.9629, GNorm = 0.5164, lr_0 = 5.2470e-04
Loss = 1.8199e-02, PNorm = 152.0006, GNorm = 0.2323, lr_0 = 5.2434e-04
Loss = 1.6554e-02, PNorm = 152.0401, GNorm = 0.3604, lr_0 = 5.2398e-04
Loss = 1.8540e-02, PNorm = 152.0821, GNorm = 0.3017, lr_0 = 5.2362e-04
Loss = 1.6157e-02, PNorm = 152.1153, GNorm = 0.5606, lr_0 = 5.2326e-04
Loss = 2.2210e-02, PNorm = 152.1526, GNorm = 0.5584, lr_0 = 5.2290e-04
Loss = 1.6505e-02, PNorm = 152.1929, GNorm = 0.2596, lr_0 = 5.2255e-04
Loss = 1.8486e-02, PNorm = 152.2329, GNorm = 0.2668, lr_0 = 5.2219e-04
Loss = 1.6866e-02, PNorm = 152.2736, GNorm = 0.8373, lr_0 = 5.2183e-04
Loss = 1.9308e-02, PNorm = 152.3152, GNorm = 0.5192, lr_0 = 5.2147e-04
Loss = 1.4900e-02, PNorm = 152.3570, GNorm = 0.2760, lr_0 = 5.2112e-04
Loss = 1.6315e-02, PNorm = 152.3954, GNorm = 0.3292, lr_0 = 5.2076e-04
Loss = 1.9322e-02, PNorm = 152.4307, GNorm = 0.2243, lr_0 = 5.2040e-04
Loss = 1.7670e-02, PNorm = 152.4680, GNorm = 0.3609, lr_0 = 5.2005e-04
Loss = 1.6673e-02, PNorm = 152.5074, GNorm = 0.9988, lr_0 = 5.1969e-04
Loss = 1.8043e-02, PNorm = 152.5486, GNorm = 0.2904, lr_0 = 5.1933e-04
Loss = 1.6805e-02, PNorm = 152.5896, GNorm = 0.4042, lr_0 = 5.1898e-04
Loss = 1.7700e-02, PNorm = 152.6329, GNorm = 0.2688, lr_0 = 5.1862e-04
Loss = 1.6357e-02, PNorm = 152.6757, GNorm = 0.3951, lr_0 = 5.1827e-04
Loss = 1.8501e-02, PNorm = 152.7144, GNorm = 0.3478, lr_0 = 5.1791e-04
Validation mae = 0.283231
Epoch 10
Loss = 1.4805e-02, PNorm = 152.7516, GNorm = 0.2944, lr_0 = 5.1756e-04
Loss = 1.7761e-02, PNorm = 152.7807, GNorm = 0.3166, lr_0 = 5.1720e-04
Loss = 1.3242e-02, PNorm = 152.8078, GNorm = 0.4144, lr_0 = 5.1685e-04
Loss = 1.5140e-02, PNorm = 152.8333, GNorm = 0.2225, lr_0 = 5.1649e-04
Loss = 1.3798e-02, PNorm = 152.8598, GNorm = 0.2860, lr_0 = 5.1614e-04
Loss = 1.5609e-02, PNorm = 152.8830, GNorm = 0.4149, lr_0 = 5.1579e-04
Loss = 1.4962e-02, PNorm = 152.9109, GNorm = 0.6825, lr_0 = 5.1543e-04
Loss = 1.3107e-02, PNorm = 152.9411, GNorm = 0.3365, lr_0 = 5.1508e-04
Loss = 1.4315e-02, PNorm = 152.9673, GNorm = 0.2464, lr_0 = 5.1473e-04
Loss = 1.2912e-02, PNorm = 152.9887, GNorm = 0.3145, lr_0 = 5.1437e-04
Loss = 1.2922e-02, PNorm = 153.0111, GNorm = 0.1714, lr_0 = 5.1402e-04
Loss = 1.3678e-02, PNorm = 153.0334, GNorm = 0.2876, lr_0 = 5.1367e-04
Loss = 1.7197e-02, PNorm = 153.0628, GNorm = 0.5981, lr_0 = 5.1332e-04
Loss = 1.3748e-02, PNorm = 153.0920, GNorm = 0.5274, lr_0 = 5.1297e-04
Loss = 1.5196e-02, PNorm = 153.1222, GNorm = 0.2226, lr_0 = 5.1262e-04
Loss = 1.3427e-02, PNorm = 153.1481, GNorm = 0.1431, lr_0 = 5.1226e-04
Loss = 1.5382e-02, PNorm = 153.1800, GNorm = 0.2986, lr_0 = 5.1191e-04
Loss = 1.2713e-02, PNorm = 153.2069, GNorm = 0.3801, lr_0 = 5.1156e-04
Loss = 1.4472e-02, PNorm = 153.2330, GNorm = 0.3884, lr_0 = 5.1121e-04
Loss = 1.5917e-02, PNorm = 153.2601, GNorm = 0.2015, lr_0 = 5.1086e-04
Loss = 1.4449e-02, PNorm = 153.2917, GNorm = 0.5242, lr_0 = 5.1051e-04
Loss = 1.2397e-02, PNorm = 153.3190, GNorm = 0.1564, lr_0 = 5.1016e-04
Loss = 1.6550e-02, PNorm = 153.3509, GNorm = 0.8006, lr_0 = 5.0981e-04
Loss = 1.5721e-02, PNorm = 153.3780, GNorm = 0.7775, lr_0 = 5.0946e-04
Loss = 1.2605e-02, PNorm = 153.4116, GNorm = 0.1963, lr_0 = 5.0911e-04
Loss = 1.5334e-02, PNorm = 153.4363, GNorm = 0.4155, lr_0 = 5.0877e-04
Loss = 1.5380e-02, PNorm = 153.4654, GNorm = 0.1976, lr_0 = 5.0842e-04
Loss = 1.4117e-02, PNorm = 153.4916, GNorm = 0.4020, lr_0 = 5.0807e-04
Loss = 1.7274e-02, PNorm = 153.5204, GNorm = 0.3522, lr_0 = 5.0772e-04
Loss = 1.1447e-02, PNorm = 153.5492, GNorm = 0.1578, lr_0 = 5.0737e-04
Loss = 1.5345e-02, PNorm = 153.5760, GNorm = 0.3068, lr_0 = 5.0703e-04
Loss = 1.4102e-02, PNorm = 153.5998, GNorm = 0.2235, lr_0 = 5.0668e-04
Loss = 1.3857e-02, PNorm = 153.6270, GNorm = 0.3300, lr_0 = 5.0633e-04
Loss = 1.2809e-02, PNorm = 153.6519, GNorm = 0.3141, lr_0 = 5.0598e-04
Loss = 1.3680e-02, PNorm = 153.6771, GNorm = 0.8777, lr_0 = 5.0564e-04
Loss = 1.4916e-02, PNorm = 153.7050, GNorm = 0.3762, lr_0 = 5.0529e-04
Loss = 1.3316e-02, PNorm = 153.7348, GNorm = 0.1098, lr_0 = 5.0494e-04
Loss = 1.4618e-02, PNorm = 153.7638, GNorm = 0.4592, lr_0 = 5.0460e-04
Loss = 1.7797e-02, PNorm = 153.7958, GNorm = 0.3440, lr_0 = 5.0425e-04
Loss = 1.2536e-02, PNorm = 153.8281, GNorm = 0.4141, lr_0 = 5.0391e-04
Loss = 1.1506e-02, PNorm = 153.8583, GNorm = 0.3337, lr_0 = 5.0356e-04
Loss = 1.3685e-02, PNorm = 153.8834, GNorm = 0.1293, lr_0 = 5.0322e-04
Loss = 1.2193e-02, PNorm = 153.9094, GNorm = 0.3135, lr_0 = 5.0287e-04
Loss = 1.4641e-02, PNorm = 153.9382, GNorm = 0.2511, lr_0 = 5.0253e-04
Loss = 1.3722e-02, PNorm = 153.9674, GNorm = 0.5176, lr_0 = 5.0218e-04
Loss = 1.2248e-02, PNorm = 153.9970, GNorm = 0.3309, lr_0 = 5.0184e-04
Loss = 1.5389e-02, PNorm = 154.0281, GNorm = 0.7565, lr_0 = 5.0150e-04
Loss = 1.2580e-02, PNorm = 154.0530, GNorm = 0.2404, lr_0 = 5.0115e-04
Loss = 1.3798e-02, PNorm = 154.0790, GNorm = 0.1892, lr_0 = 5.0081e-04
Loss = 1.5436e-02, PNorm = 154.1096, GNorm = 0.5637, lr_0 = 5.0047e-04
Loss = 1.4513e-02, PNorm = 154.1439, GNorm = 0.5131, lr_0 = 5.0012e-04
Loss = 1.4181e-02, PNorm = 154.1735, GNorm = 0.2481, lr_0 = 4.9978e-04
Loss = 1.4102e-02, PNorm = 154.2011, GNorm = 0.4390, lr_0 = 4.9944e-04
Loss = 1.3021e-02, PNorm = 154.2327, GNorm = 0.3111, lr_0 = 4.9910e-04
Loss = 1.3110e-02, PNorm = 154.2622, GNorm = 0.1925, lr_0 = 4.9875e-04
Loss = 1.3625e-02, PNorm = 154.2958, GNorm = 0.1658, lr_0 = 4.9841e-04
Loss = 1.4538e-02, PNorm = 154.3292, GNorm = 0.2593, lr_0 = 4.9807e-04
Loss = 1.2058e-02, PNorm = 154.3630, GNorm = 0.2105, lr_0 = 4.9773e-04
Loss = 1.3179e-02, PNorm = 154.3954, GNorm = 0.3302, lr_0 = 4.9739e-04
Loss = 1.3955e-02, PNorm = 154.4279, GNorm = 0.3349, lr_0 = 4.9705e-04
Loss = 1.3651e-02, PNorm = 154.4560, GNorm = 0.2700, lr_0 = 4.9671e-04
Loss = 1.3245e-02, PNorm = 154.4890, GNorm = 0.4544, lr_0 = 4.9637e-04
Loss = 1.7212e-02, PNorm = 154.5133, GNorm = 0.4607, lr_0 = 4.9603e-04
Loss = 1.2206e-02, PNorm = 154.5507, GNorm = 0.1832, lr_0 = 4.9569e-04
Loss = 1.5326e-02, PNorm = 154.5878, GNorm = 0.3108, lr_0 = 4.9535e-04
Loss = 1.2205e-02, PNorm = 154.6238, GNorm = 0.2313, lr_0 = 4.9501e-04
Loss = 1.2748e-02, PNorm = 154.6551, GNorm = 0.2709, lr_0 = 4.9467e-04
Loss = 1.2889e-02, PNorm = 154.6890, GNorm = 0.6135, lr_0 = 4.9433e-04
Loss = 1.3444e-02, PNorm = 154.7268, GNorm = 0.1694, lr_0 = 4.9399e-04
Loss = 1.2814e-02, PNorm = 154.7630, GNorm = 0.4196, lr_0 = 4.9365e-04
Loss = 1.7206e-02, PNorm = 154.7971, GNorm = 0.6660, lr_0 = 4.9332e-04
Loss = 1.6236e-02, PNorm = 154.8340, GNorm = 0.1628, lr_0 = 4.9298e-04
Loss = 1.7532e-02, PNorm = 154.8670, GNorm = 0.1896, lr_0 = 4.9264e-04
Loss = 1.4199e-02, PNorm = 154.9033, GNorm = 0.5942, lr_0 = 4.9230e-04
Loss = 1.3936e-02, PNorm = 154.9359, GNorm = 0.1328, lr_0 = 4.9197e-04
Loss = 1.4269e-02, PNorm = 154.9723, GNorm = 0.3893, lr_0 = 4.9163e-04
Loss = 1.3344e-02, PNorm = 155.0071, GNorm = 0.2677, lr_0 = 4.9129e-04
Loss = 1.9852e-02, PNorm = 155.0386, GNorm = 0.2386, lr_0 = 4.9095e-04
Loss = 1.2634e-02, PNorm = 155.0687, GNorm = 0.3444, lr_0 = 4.9062e-04
Loss = 1.1628e-02, PNorm = 155.1023, GNorm = 0.2760, lr_0 = 4.9028e-04
Loss = 1.4115e-02, PNorm = 155.1353, GNorm = 0.2715, lr_0 = 4.8995e-04
Loss = 1.6497e-02, PNorm = 155.1653, GNorm = 0.2334, lr_0 = 4.8961e-04
Loss = 1.3973e-02, PNorm = 155.1981, GNorm = 0.3650, lr_0 = 4.8928e-04
Loss = 1.7326e-02, PNorm = 155.2323, GNorm = 0.2095, lr_0 = 4.8894e-04
Loss = 1.3222e-02, PNorm = 155.2670, GNorm = 0.5619, lr_0 = 4.8861e-04
Loss = 1.1729e-02, PNorm = 155.2990, GNorm = 0.2828, lr_0 = 4.8827e-04
Loss = 1.3493e-02, PNorm = 155.3370, GNorm = 0.2473, lr_0 = 4.8794e-04
Loss = 1.8889e-02, PNorm = 155.3699, GNorm = 0.6774, lr_0 = 4.8760e-04
Loss = 1.3832e-02, PNorm = 155.4033, GNorm = 0.3313, lr_0 = 4.8727e-04
Loss = 1.3325e-02, PNorm = 155.4343, GNorm = 0.2333, lr_0 = 4.8693e-04
Loss = 1.2136e-02, PNorm = 155.4710, GNorm = 0.7457, lr_0 = 4.8660e-04
Loss = 1.4081e-02, PNorm = 155.5067, GNorm = 0.3090, lr_0 = 4.8627e-04
Loss = 1.6074e-02, PNorm = 155.5392, GNorm = 0.3141, lr_0 = 4.8593e-04
Loss = 1.3027e-02, PNorm = 155.5764, GNorm = 0.5809, lr_0 = 4.8560e-04
Loss = 1.3657e-02, PNorm = 155.6058, GNorm = 0.4320, lr_0 = 4.8527e-04
Loss = 1.3924e-02, PNorm = 155.6318, GNorm = 0.3404, lr_0 = 4.8494e-04
Loss = 1.2248e-02, PNorm = 155.6647, GNorm = 0.2733, lr_0 = 4.8460e-04
Loss = 1.7051e-02, PNorm = 155.6959, GNorm = 0.5985, lr_0 = 4.8427e-04
Loss = 1.0589e-02, PNorm = 155.7247, GNorm = 0.3724, lr_0 = 4.8394e-04
Loss = 1.3437e-02, PNorm = 155.7542, GNorm = 0.2022, lr_0 = 4.8361e-04
Loss = 1.3807e-02, PNorm = 155.7861, GNorm = 0.2604, lr_0 = 4.8328e-04
Loss = 1.3444e-02, PNorm = 155.8204, GNorm = 0.1892, lr_0 = 4.8295e-04
Loss = 1.4491e-02, PNorm = 155.8544, GNorm = 0.2561, lr_0 = 4.8262e-04
Loss = 1.4376e-02, PNorm = 155.8904, GNorm = 0.3059, lr_0 = 4.8228e-04
Loss = 1.4399e-02, PNorm = 155.9230, GNorm = 0.5289, lr_0 = 4.8195e-04
Loss = 1.5018e-02, PNorm = 155.9602, GNorm = 0.4120, lr_0 = 4.8162e-04
Loss = 1.3473e-02, PNorm = 155.9984, GNorm = 0.2685, lr_0 = 4.8129e-04
Loss = 1.9195e-02, PNorm = 156.0350, GNorm = 0.3048, lr_0 = 4.8096e-04
Loss = 1.6488e-02, PNorm = 156.0701, GNorm = 0.1679, lr_0 = 4.8064e-04
Loss = 1.4625e-02, PNorm = 156.1095, GNorm = 0.2134, lr_0 = 4.8031e-04
Loss = 1.6161e-02, PNorm = 156.1419, GNorm = 0.5562, lr_0 = 4.7998e-04
Loss = 1.3387e-02, PNorm = 156.1734, GNorm = 0.7088, lr_0 = 4.7965e-04
Loss = 1.6887e-02, PNorm = 156.2065, GNorm = 0.3329, lr_0 = 4.7932e-04
Loss = 1.6227e-02, PNorm = 156.2453, GNorm = 0.2394, lr_0 = 4.7899e-04
Loss = 2.2209e-02, PNorm = 156.2853, GNorm = 0.2628, lr_0 = 4.7866e-04
Loss = 1.3187e-02, PNorm = 156.3269, GNorm = 0.2946, lr_0 = 4.7833e-04
Loss = 1.3456e-02, PNorm = 156.3650, GNorm = 0.1956, lr_0 = 4.7801e-04
Loss = 1.4296e-02, PNorm = 156.3971, GNorm = 0.4204, lr_0 = 4.7768e-04
Loss = 1.3062e-02, PNorm = 156.4288, GNorm = 0.1692, lr_0 = 4.7735e-04
Loss = 1.4314e-02, PNorm = 156.4627, GNorm = 0.1909, lr_0 = 4.7703e-04
Validation mae = 0.282291
Epoch 11
Loss = 1.4351e-02, PNorm = 156.4899, GNorm = 0.3252, lr_0 = 4.7670e-04
Loss = 1.2903e-02, PNorm = 156.5167, GNorm = 0.1934, lr_0 = 4.7637e-04
Loss = 1.4088e-02, PNorm = 156.5417, GNorm = 0.3011, lr_0 = 4.7605e-04
Loss = 1.2235e-02, PNorm = 156.5672, GNorm = 0.5347, lr_0 = 4.7572e-04
Loss = 1.3137e-02, PNorm = 156.5956, GNorm = 0.2841, lr_0 = 4.7539e-04
Loss = 1.2017e-02, PNorm = 156.6196, GNorm = 0.2849, lr_0 = 4.7507e-04
Loss = 1.5086e-02, PNorm = 156.6427, GNorm = 0.1794, lr_0 = 4.7474e-04
Loss = 1.2525e-02, PNorm = 156.6659, GNorm = 0.4689, lr_0 = 4.7442e-04
Loss = 1.1312e-02, PNorm = 156.6883, GNorm = 0.3376, lr_0 = 4.7409e-04
Loss = 1.3129e-02, PNorm = 156.7044, GNorm = 0.6834, lr_0 = 4.7377e-04
Loss = 1.3468e-02, PNorm = 156.7271, GNorm = 0.6060, lr_0 = 4.7344e-04
Loss = 1.2984e-02, PNorm = 156.7515, GNorm = 0.2887, lr_0 = 4.7312e-04
Loss = 1.3122e-02, PNorm = 156.7781, GNorm = 0.5556, lr_0 = 4.7279e-04
Loss = 1.2259e-02, PNorm = 156.8048, GNorm = 0.1179, lr_0 = 4.7247e-04
Loss = 1.2162e-02, PNorm = 156.8294, GNorm = 0.3043, lr_0 = 4.7215e-04
Loss = 1.3242e-02, PNorm = 156.8536, GNorm = 0.2593, lr_0 = 4.7182e-04
Loss = 1.2590e-02, PNorm = 156.8758, GNorm = 0.5145, lr_0 = 4.7150e-04
Loss = 1.1863e-02, PNorm = 156.9001, GNorm = 0.1774, lr_0 = 4.7118e-04
Loss = 1.5199e-02, PNorm = 156.9219, GNorm = 0.2678, lr_0 = 4.7085e-04
Loss = 1.1571e-02, PNorm = 156.9507, GNorm = 0.3724, lr_0 = 4.7053e-04
Loss = 1.3799e-02, PNorm = 156.9800, GNorm = 0.3020, lr_0 = 4.7021e-04
Loss = 1.4076e-02, PNorm = 157.0127, GNorm = 0.3012, lr_0 = 4.6989e-04
Loss = 1.1977e-02, PNorm = 157.0401, GNorm = 0.2050, lr_0 = 4.6957e-04
Loss = 1.1933e-02, PNorm = 157.0636, GNorm = 0.3320, lr_0 = 4.6924e-04
Loss = 1.4167e-02, PNorm = 157.0872, GNorm = 0.4028, lr_0 = 4.6892e-04
Loss = 9.9755e-03, PNorm = 157.1140, GNorm = 0.1588, lr_0 = 4.6860e-04
Loss = 1.1479e-02, PNorm = 157.1396, GNorm = 0.8199, lr_0 = 4.6828e-04
Loss = 1.3019e-02, PNorm = 157.1628, GNorm = 0.5216, lr_0 = 4.6796e-04
Loss = 1.0559e-02, PNorm = 157.1839, GNorm = 0.3015, lr_0 = 4.6764e-04
Loss = 1.0463e-02, PNorm = 157.2062, GNorm = 0.4607, lr_0 = 4.6732e-04
Loss = 1.2232e-02, PNorm = 157.2292, GNorm = 0.1820, lr_0 = 4.6700e-04
Loss = 1.2139e-02, PNorm = 157.2546, GNorm = 0.3913, lr_0 = 4.6668e-04
Loss = 1.2433e-02, PNorm = 157.2788, GNorm = 0.9011, lr_0 = 4.6636e-04
Loss = 1.1100e-02, PNorm = 157.3033, GNorm = 0.1511, lr_0 = 4.6604e-04
Loss = 1.1551e-02, PNorm = 157.3291, GNorm = 0.8064, lr_0 = 4.6572e-04
Loss = 1.5501e-02, PNorm = 157.3547, GNorm = 0.2081, lr_0 = 4.6540e-04
Loss = 1.1116e-02, PNorm = 157.3811, GNorm = 0.4065, lr_0 = 4.6508e-04
Loss = 1.3720e-02, PNorm = 157.4093, GNorm = 0.2040, lr_0 = 4.6476e-04
Loss = 1.1949e-02, PNorm = 157.4346, GNorm = 0.3558, lr_0 = 4.6445e-04
Loss = 9.7865e-03, PNorm = 157.4554, GNorm = 0.3454, lr_0 = 4.6413e-04
Loss = 1.1557e-02, PNorm = 157.4759, GNorm = 0.5953, lr_0 = 4.6381e-04
Loss = 1.3066e-02, PNorm = 157.5010, GNorm = 0.3286, lr_0 = 4.6349e-04
Loss = 1.3947e-02, PNorm = 157.5257, GNorm = 0.4159, lr_0 = 4.6317e-04
Loss = 1.0617e-02, PNorm = 157.5503, GNorm = 0.3407, lr_0 = 4.6286e-04
Loss = 1.1907e-02, PNorm = 157.5786, GNorm = 0.3529, lr_0 = 4.6254e-04
Loss = 9.9076e-03, PNorm = 157.6106, GNorm = 0.2962, lr_0 = 4.6222e-04
Loss = 1.1966e-02, PNorm = 157.6413, GNorm = 0.2723, lr_0 = 4.6191e-04
Loss = 1.3322e-02, PNorm = 157.6667, GNorm = 0.5158, lr_0 = 4.6159e-04
Loss = 1.5056e-02, PNorm = 157.6928, GNorm = 0.2500, lr_0 = 4.6127e-04
Loss = 1.0622e-02, PNorm = 157.7197, GNorm = 0.3873, lr_0 = 4.6096e-04
Loss = 9.5058e-03, PNorm = 157.7471, GNorm = 0.4583, lr_0 = 4.6064e-04
Loss = 1.1111e-02, PNorm = 157.7743, GNorm = 0.1973, lr_0 = 4.6033e-04
Loss = 1.3200e-02, PNorm = 157.8002, GNorm = 0.4133, lr_0 = 4.6001e-04
Loss = 1.1548e-02, PNorm = 157.8201, GNorm = 0.1791, lr_0 = 4.5970e-04
Loss = 1.3446e-02, PNorm = 157.8429, GNorm = 0.1441, lr_0 = 4.5938e-04
Loss = 1.4680e-02, PNorm = 157.8664, GNorm = 0.1356, lr_0 = 4.5907e-04
Loss = 1.1600e-02, PNorm = 157.8903, GNorm = 0.2239, lr_0 = 4.5875e-04
Loss = 9.7923e-03, PNorm = 157.9172, GNorm = 0.1611, lr_0 = 4.5844e-04
Loss = 1.2524e-02, PNorm = 157.9451, GNorm = 0.0915, lr_0 = 4.5812e-04
Loss = 1.1689e-02, PNorm = 157.9701, GNorm = 0.1639, lr_0 = 4.5781e-04
Loss = 1.1534e-02, PNorm = 157.9936, GNorm = 0.2107, lr_0 = 4.5750e-04
Loss = 1.2395e-02, PNorm = 158.0221, GNorm = 0.3923, lr_0 = 4.5718e-04
Loss = 1.1718e-02, PNorm = 158.0512, GNorm = 0.2118, lr_0 = 4.5687e-04
Loss = 1.0778e-02, PNorm = 158.0780, GNorm = 0.4660, lr_0 = 4.5656e-04
Loss = 1.1451e-02, PNorm = 158.1052, GNorm = 0.1336, lr_0 = 4.5624e-04
Loss = 1.2652e-02, PNorm = 158.1305, GNorm = 0.2924, lr_0 = 4.5593e-04
Loss = 1.3543e-02, PNorm = 158.1572, GNorm = 0.2725, lr_0 = 4.5562e-04
Loss = 1.1119e-02, PNorm = 158.1886, GNorm = 0.1974, lr_0 = 4.5531e-04
Loss = 1.1435e-02, PNorm = 158.2168, GNorm = 0.2991, lr_0 = 4.5499e-04
Loss = 1.5069e-02, PNorm = 158.2421, GNorm = 0.7330, lr_0 = 4.5468e-04
Loss = 1.4476e-02, PNorm = 158.2712, GNorm = 0.1429, lr_0 = 4.5437e-04
Loss = 1.2474e-02, PNorm = 158.3008, GNorm = 0.2761, lr_0 = 4.5406e-04
Loss = 1.2596e-02, PNorm = 158.3322, GNorm = 0.2410, lr_0 = 4.5375e-04
Loss = 1.1334e-02, PNorm = 158.3590, GNorm = 0.5124, lr_0 = 4.5344e-04
Loss = 1.1018e-02, PNorm = 158.3845, GNorm = 0.1213, lr_0 = 4.5313e-04
Loss = 1.1891e-02, PNorm = 158.4096, GNorm = 0.4280, lr_0 = 4.5282e-04
Loss = 1.2896e-02, PNorm = 158.4304, GNorm = 0.3565, lr_0 = 4.5251e-04
Loss = 1.0308e-02, PNorm = 158.4596, GNorm = 0.1279, lr_0 = 4.5220e-04
Loss = 1.0919e-02, PNorm = 158.4907, GNorm = 0.2851, lr_0 = 4.5189e-04
Loss = 1.1351e-02, PNorm = 158.5240, GNorm = 0.2547, lr_0 = 4.5158e-04
Loss = 1.2285e-02, PNorm = 158.5518, GNorm = 0.2716, lr_0 = 4.5127e-04
Loss = 9.3910e-03, PNorm = 158.5773, GNorm = 0.2340, lr_0 = 4.5096e-04
Loss = 1.1588e-02, PNorm = 158.6005, GNorm = 0.0979, lr_0 = 4.5065e-04
Loss = 1.2951e-02, PNorm = 158.6288, GNorm = 0.1936, lr_0 = 4.5034e-04
Loss = 1.1937e-02, PNorm = 158.6566, GNorm = 0.1767, lr_0 = 4.5003e-04
Loss = 9.6742e-03, PNorm = 158.6853, GNorm = 0.1983, lr_0 = 4.4972e-04
Loss = 1.0725e-02, PNorm = 158.7129, GNorm = 0.1146, lr_0 = 4.4942e-04
Loss = 1.2983e-02, PNorm = 158.7389, GNorm = 0.1500, lr_0 = 4.4911e-04
Loss = 1.1311e-02, PNorm = 158.7676, GNorm = 0.2351, lr_0 = 4.4880e-04
Loss = 1.1622e-02, PNorm = 158.7926, GNorm = 0.2234, lr_0 = 4.4849e-04
Loss = 1.2542e-02, PNorm = 158.8208, GNorm = 0.4576, lr_0 = 4.4819e-04
Loss = 1.3037e-02, PNorm = 158.8508, GNorm = 0.2662, lr_0 = 4.4788e-04
Loss = 1.2025e-02, PNorm = 158.8790, GNorm = 0.4189, lr_0 = 4.4757e-04
Loss = 1.1651e-02, PNorm = 158.9081, GNorm = 0.1682, lr_0 = 4.4727e-04
Loss = 1.1912e-02, PNorm = 158.9353, GNorm = 0.6901, lr_0 = 4.4696e-04
Loss = 1.0439e-02, PNorm = 158.9669, GNorm = 0.2675, lr_0 = 4.4665e-04
Loss = 1.1927e-02, PNorm = 158.9951, GNorm = 0.3710, lr_0 = 4.4635e-04
Loss = 1.2757e-02, PNorm = 159.0212, GNorm = 0.3493, lr_0 = 4.4604e-04
Loss = 1.0551e-02, PNorm = 159.0465, GNorm = 0.5311, lr_0 = 4.4574e-04
Loss = 1.3367e-02, PNorm = 159.0722, GNorm = 0.3960, lr_0 = 4.4543e-04
Loss = 1.5476e-02, PNorm = 159.0993, GNorm = 0.2872, lr_0 = 4.4513e-04
Loss = 1.2078e-02, PNorm = 159.1275, GNorm = 0.2198, lr_0 = 4.4482e-04
Loss = 1.3846e-02, PNorm = 159.1550, GNorm = 0.2477, lr_0 = 4.4452e-04
Loss = 1.3667e-02, PNorm = 159.1804, GNorm = 0.3127, lr_0 = 4.4421e-04
Loss = 1.1550e-02, PNorm = 159.2050, GNorm = 0.3826, lr_0 = 4.4391e-04
Loss = 1.4303e-02, PNorm = 159.2283, GNorm = 0.2004, lr_0 = 4.4360e-04
Loss = 1.2312e-02, PNorm = 159.2532, GNorm = 0.4571, lr_0 = 4.4330e-04
Loss = 1.3386e-02, PNorm = 159.2820, GNorm = 0.1401, lr_0 = 4.4299e-04
Loss = 9.9902e-03, PNorm = 159.3157, GNorm = 0.2074, lr_0 = 4.4269e-04
Loss = 1.0595e-02, PNorm = 159.3463, GNorm = 0.3444, lr_0 = 4.4239e-04
Loss = 1.1185e-02, PNorm = 159.3725, GNorm = 0.5908, lr_0 = 4.4209e-04
Loss = 1.4271e-02, PNorm = 159.3973, GNorm = 0.5007, lr_0 = 4.4178e-04
Loss = 1.2769e-02, PNorm = 159.4279, GNorm = 0.7113, lr_0 = 4.4148e-04
Loss = 1.2155e-02, PNorm = 159.4612, GNorm = 0.3369, lr_0 = 4.4118e-04
Loss = 1.6637e-02, PNorm = 159.4938, GNorm = 0.1766, lr_0 = 4.4088e-04
Loss = 1.1568e-02, PNorm = 159.5231, GNorm = 0.1292, lr_0 = 4.4057e-04
Loss = 1.3216e-02, PNorm = 159.5567, GNorm = 0.2925, lr_0 = 4.4027e-04
Loss = 1.2101e-02, PNorm = 159.5897, GNorm = 0.4882, lr_0 = 4.3997e-04
Loss = 1.4875e-02, PNorm = 159.6207, GNorm = 0.2049, lr_0 = 4.3967e-04
Loss = 1.2528e-02, PNorm = 159.6511, GNorm = 0.7525, lr_0 = 4.3937e-04
Validation mae = 0.283025
Epoch 12
Loss = 1.1526e-02, PNorm = 159.6783, GNorm = 0.2550, lr_0 = 4.3907e-04
Loss = 1.1429e-02, PNorm = 159.7051, GNorm = 0.1782, lr_0 = 4.3877e-04
Loss = 1.1544e-02, PNorm = 159.7309, GNorm = 0.3369, lr_0 = 4.3846e-04
Loss = 1.0851e-02, PNorm = 159.7542, GNorm = 0.1790, lr_0 = 4.3816e-04
Loss = 1.1937e-02, PNorm = 159.7747, GNorm = 0.2083, lr_0 = 4.3786e-04
Loss = 1.0708e-02, PNorm = 159.7933, GNorm = 0.2463, lr_0 = 4.3756e-04
Loss = 1.1405e-02, PNorm = 159.8102, GNorm = 0.2658, lr_0 = 4.3726e-04
Loss = 1.0396e-02, PNorm = 159.8291, GNorm = 0.4352, lr_0 = 4.3696e-04
Loss = 9.8066e-03, PNorm = 159.8462, GNorm = 0.3197, lr_0 = 4.3667e-04
Loss = 1.0729e-02, PNorm = 159.8668, GNorm = 0.2508, lr_0 = 4.3637e-04
Loss = 1.1364e-02, PNorm = 159.8835, GNorm = 0.2549, lr_0 = 4.3607e-04
Loss = 1.1485e-02, PNorm = 159.9032, GNorm = 0.3867, lr_0 = 4.3577e-04
Loss = 1.0121e-02, PNorm = 159.9256, GNorm = 0.3672, lr_0 = 4.3547e-04
Loss = 9.6583e-03, PNorm = 159.9489, GNorm = 0.2777, lr_0 = 4.3517e-04
Loss = 1.0449e-02, PNorm = 159.9691, GNorm = 0.3658, lr_0 = 4.3487e-04
Loss = 9.3728e-03, PNorm = 159.9935, GNorm = 0.1530, lr_0 = 4.3458e-04
Loss = 1.2121e-02, PNorm = 160.0130, GNorm = 0.3278, lr_0 = 4.3428e-04
Loss = 1.0553e-02, PNorm = 160.0337, GNorm = 0.4008, lr_0 = 4.3398e-04
Loss = 1.0787e-02, PNorm = 160.0519, GNorm = 0.2244, lr_0 = 4.3368e-04
Loss = 9.8983e-03, PNorm = 160.0698, GNorm = 0.5868, lr_0 = 4.3339e-04
Loss = 9.3705e-03, PNorm = 160.0892, GNorm = 0.4167, lr_0 = 4.3309e-04
Loss = 1.2440e-02, PNorm = 160.1118, GNorm = 0.4956, lr_0 = 4.3279e-04
Loss = 9.7099e-03, PNorm = 160.1353, GNorm = 0.3020, lr_0 = 4.3250e-04
Loss = 1.0377e-02, PNorm = 160.1559, GNorm = 0.3495, lr_0 = 4.3220e-04
Loss = 8.4643e-03, PNorm = 160.1789, GNorm = 0.3178, lr_0 = 4.3190e-04
Loss = 1.1626e-02, PNorm = 160.1988, GNorm = 0.2088, lr_0 = 4.3161e-04
Loss = 1.0466e-02, PNorm = 160.2191, GNorm = 0.3515, lr_0 = 4.3131e-04
Loss = 8.2627e-03, PNorm = 160.2411, GNorm = 0.2740, lr_0 = 4.3102e-04
Loss = 1.0072e-02, PNorm = 160.2636, GNorm = 0.2498, lr_0 = 4.3072e-04
Loss = 9.2321e-03, PNorm = 160.2858, GNorm = 0.3424, lr_0 = 4.3043e-04
Loss = 1.0332e-02, PNorm = 160.3055, GNorm = 0.1700, lr_0 = 4.3013e-04
Loss = 1.0646e-02, PNorm = 160.3258, GNorm = 0.3681, lr_0 = 4.2984e-04
Loss = 9.2600e-03, PNorm = 160.3465, GNorm = 0.4121, lr_0 = 4.2954e-04
Loss = 7.5282e-03, PNorm = 160.3684, GNorm = 0.2918, lr_0 = 4.2925e-04
Loss = 1.0410e-02, PNorm = 160.3884, GNorm = 0.2385, lr_0 = 4.2895e-04
Loss = 9.0774e-03, PNorm = 160.4070, GNorm = 0.2971, lr_0 = 4.2866e-04
Loss = 9.7971e-03, PNorm = 160.4256, GNorm = 0.3793, lr_0 = 4.2837e-04
Loss = 1.0386e-02, PNorm = 160.4514, GNorm = 0.3239, lr_0 = 4.2807e-04
Loss = 1.1920e-02, PNorm = 160.4793, GNorm = 0.2213, lr_0 = 4.2778e-04
Loss = 9.9669e-03, PNorm = 160.5025, GNorm = 0.2849, lr_0 = 4.2749e-04
Loss = 7.9232e-03, PNorm = 160.5213, GNorm = 0.1755, lr_0 = 4.2719e-04
Loss = 8.6125e-03, PNorm = 160.5406, GNorm = 0.2207, lr_0 = 4.2690e-04
Loss = 9.1916e-03, PNorm = 160.5599, GNorm = 0.1688, lr_0 = 4.2661e-04
Loss = 1.1736e-02, PNorm = 160.5788, GNorm = 0.1448, lr_0 = 4.2632e-04
Loss = 9.2157e-03, PNorm = 160.5965, GNorm = 0.2916, lr_0 = 4.2602e-04
Loss = 9.7293e-03, PNorm = 160.6181, GNorm = 0.1741, lr_0 = 4.2573e-04
Loss = 9.2341e-03, PNorm = 160.6418, GNorm = 0.1999, lr_0 = 4.2544e-04
Loss = 9.8297e-03, PNorm = 160.6638, GNorm = 0.2608, lr_0 = 4.2515e-04
Loss = 9.9608e-03, PNorm = 160.6820, GNorm = 0.5546, lr_0 = 4.2486e-04
Loss = 1.0965e-02, PNorm = 160.7043, GNorm = 0.2109, lr_0 = 4.2457e-04
Loss = 8.5906e-03, PNorm = 160.7242, GNorm = 0.3807, lr_0 = 4.2428e-04
Loss = 8.8481e-03, PNorm = 160.7426, GNorm = 0.2094, lr_0 = 4.2399e-04
Loss = 9.0415e-03, PNorm = 160.7611, GNorm = 0.4613, lr_0 = 4.2370e-04
Loss = 9.5697e-03, PNorm = 160.7794, GNorm = 0.5439, lr_0 = 4.2340e-04
Loss = 1.0598e-02, PNorm = 160.8002, GNorm = 0.4135, lr_0 = 4.2311e-04
Loss = 1.0039e-02, PNorm = 160.8214, GNorm = 0.3817, lr_0 = 4.2283e-04
Loss = 1.0293e-02, PNorm = 160.8439, GNorm = 0.5197, lr_0 = 4.2254e-04
Loss = 8.5593e-03, PNorm = 160.8645, GNorm = 0.1183, lr_0 = 4.2225e-04
Loss = 9.3574e-03, PNorm = 160.8851, GNorm = 0.2529, lr_0 = 4.2196e-04
Loss = 1.1101e-02, PNorm = 160.9087, GNorm = 0.1580, lr_0 = 4.2167e-04
Loss = 9.8588e-03, PNorm = 160.9297, GNorm = 0.1730, lr_0 = 4.2138e-04
Loss = 9.9859e-03, PNorm = 160.9511, GNorm = 0.2835, lr_0 = 4.2109e-04
Loss = 1.1629e-02, PNorm = 160.9737, GNorm = 0.3981, lr_0 = 4.2080e-04
Loss = 1.2951e-02, PNorm = 160.9954, GNorm = 0.2278, lr_0 = 4.2051e-04
Loss = 9.0592e-03, PNorm = 161.0179, GNorm = 0.3330, lr_0 = 4.2023e-04
Loss = 1.0374e-02, PNorm = 161.0424, GNorm = 0.3723, lr_0 = 4.1994e-04
Loss = 9.6986e-03, PNorm = 161.0669, GNorm = 0.1042, lr_0 = 4.1965e-04
Loss = 9.5981e-03, PNorm = 161.0887, GNorm = 0.3397, lr_0 = 4.1936e-04
Loss = 1.0901e-02, PNorm = 161.1126, GNorm = 0.5226, lr_0 = 4.1907e-04
Loss = 1.0927e-02, PNorm = 161.1307, GNorm = 0.4166, lr_0 = 4.1879e-04
Loss = 1.1291e-02, PNorm = 161.1569, GNorm = 0.5987, lr_0 = 4.1850e-04
Loss = 9.9433e-03, PNorm = 161.1845, GNorm = 0.2511, lr_0 = 4.1821e-04
Loss = 1.0541e-02, PNorm = 161.2096, GNorm = 0.1401, lr_0 = 4.1793e-04
Loss = 1.3417e-02, PNorm = 161.2305, GNorm = 0.1006, lr_0 = 4.1764e-04
Loss = 1.0160e-02, PNorm = 161.2532, GNorm = 0.3913, lr_0 = 4.1736e-04
Loss = 1.4839e-02, PNorm = 161.2753, GNorm = 0.4014, lr_0 = 4.1707e-04
Loss = 9.3835e-03, PNorm = 161.3010, GNorm = 0.1951, lr_0 = 4.1678e-04
Loss = 1.1633e-02, PNorm = 161.3279, GNorm = 0.3222, lr_0 = 4.1650e-04
Loss = 1.0637e-02, PNorm = 161.3540, GNorm = 0.4660, lr_0 = 4.1621e-04
Loss = 9.5894e-03, PNorm = 161.3770, GNorm = 0.1291, lr_0 = 4.1593e-04
Loss = 9.9634e-03, PNorm = 161.3981, GNorm = 0.3429, lr_0 = 4.1564e-04
Loss = 1.1027e-02, PNorm = 161.4197, GNorm = 0.2280, lr_0 = 4.1536e-04
Loss = 9.5589e-03, PNorm = 161.4422, GNorm = 0.1845, lr_0 = 4.1507e-04
Loss = 1.2268e-02, PNorm = 161.4670, GNorm = 0.3282, lr_0 = 4.1479e-04
Loss = 8.2225e-03, PNorm = 161.4895, GNorm = 0.3103, lr_0 = 4.1450e-04
Loss = 1.1562e-02, PNorm = 161.5113, GNorm = 0.1155, lr_0 = 4.1422e-04
Loss = 1.5910e-02, PNorm = 161.5321, GNorm = 0.1821, lr_0 = 4.1394e-04
Loss = 1.0629e-02, PNorm = 161.5569, GNorm = 0.2407, lr_0 = 4.1365e-04
Loss = 1.1321e-02, PNorm = 161.5794, GNorm = 0.3770, lr_0 = 4.1337e-04
Loss = 1.2175e-02, PNorm = 161.6061, GNorm = 0.4368, lr_0 = 4.1309e-04
Loss = 1.1024e-02, PNorm = 161.6294, GNorm = 0.2695, lr_0 = 4.1280e-04
Loss = 1.0948e-02, PNorm = 161.6557, GNorm = 0.2542, lr_0 = 4.1252e-04
Loss = 8.2157e-03, PNorm = 161.6814, GNorm = 0.3312, lr_0 = 4.1224e-04
Loss = 1.0365e-02, PNorm = 161.7040, GNorm = 0.2654, lr_0 = 4.1196e-04
Loss = 8.4827e-03, PNorm = 161.7243, GNorm = 0.1677, lr_0 = 4.1167e-04
Loss = 9.8292e-03, PNorm = 161.7423, GNorm = 0.1286, lr_0 = 4.1139e-04
Loss = 1.3522e-02, PNorm = 161.7612, GNorm = 0.2754, lr_0 = 4.1111e-04
Loss = 8.4511e-03, PNorm = 161.7863, GNorm = 0.0959, lr_0 = 4.1083e-04
Loss = 9.9283e-03, PNorm = 161.8096, GNorm = 0.3399, lr_0 = 4.1055e-04
Loss = 1.1110e-02, PNorm = 161.8345, GNorm = 0.2943, lr_0 = 4.1027e-04
Loss = 1.1619e-02, PNorm = 161.8562, GNorm = 0.3606, lr_0 = 4.0998e-04
Loss = 8.7184e-03, PNorm = 161.8793, GNorm = 0.0903, lr_0 = 4.0970e-04
Loss = 9.7923e-03, PNorm = 161.9033, GNorm = 0.5020, lr_0 = 4.0942e-04
Loss = 9.5554e-03, PNorm = 161.9277, GNorm = 0.2940, lr_0 = 4.0914e-04
Loss = 1.0109e-02, PNorm = 161.9526, GNorm = 0.1777, lr_0 = 4.0886e-04
Loss = 9.3965e-03, PNorm = 161.9756, GNorm = 0.1396, lr_0 = 4.0858e-04
Loss = 8.7795e-03, PNorm = 161.9990, GNorm = 0.4664, lr_0 = 4.0830e-04
Loss = 1.0410e-02, PNorm = 162.0213, GNorm = 0.7966, lr_0 = 4.0802e-04
Loss = 1.2174e-02, PNorm = 162.0473, GNorm = 1.3057, lr_0 = 4.0774e-04
Loss = 1.1024e-02, PNorm = 162.0709, GNorm = 0.7638, lr_0 = 4.0746e-04
Loss = 1.2195e-02, PNorm = 162.0981, GNorm = 0.2328, lr_0 = 4.0718e-04
Loss = 1.0849e-02, PNorm = 162.1242, GNorm = 0.4822, lr_0 = 4.0691e-04
Loss = 9.8594e-03, PNorm = 162.1529, GNorm = 0.4547, lr_0 = 4.0663e-04
Loss = 1.1313e-02, PNorm = 162.1847, GNorm = 0.7324, lr_0 = 4.0635e-04
Loss = 1.0670e-02, PNorm = 162.2176, GNorm = 0.4472, lr_0 = 4.0607e-04
Loss = 9.1324e-03, PNorm = 162.2420, GNorm = 0.5370, lr_0 = 4.0579e-04
Loss = 1.4061e-02, PNorm = 162.2678, GNorm = 0.3363, lr_0 = 4.0551e-04
Loss = 9.3117e-03, PNorm = 162.2913, GNorm = 0.1353, lr_0 = 4.0524e-04
Loss = 1.1172e-02, PNorm = 162.3148, GNorm = 0.2749, lr_0 = 4.0496e-04
Loss = 8.2938e-03, PNorm = 162.3372, GNorm = 0.1251, lr_0 = 4.0468e-04
Validation mae = 0.281944
Epoch 13
Loss = 8.9964e-03, PNorm = 162.3547, GNorm = 0.3720, lr_0 = 4.0440e-04
Loss = 1.1848e-02, PNorm = 162.3724, GNorm = 0.1373, lr_0 = 4.0413e-04
Loss = 1.0657e-02, PNorm = 162.3894, GNorm = 0.2113, lr_0 = 4.0385e-04
Loss = 9.3367e-03, PNorm = 162.4079, GNorm = 0.2730, lr_0 = 4.0357e-04
Loss = 9.2709e-03, PNorm = 162.4255, GNorm = 0.2339, lr_0 = 4.0330e-04
Loss = 9.3453e-03, PNorm = 162.4417, GNorm = 0.2910, lr_0 = 4.0302e-04
Loss = 9.4272e-03, PNorm = 162.4585, GNorm = 0.5807, lr_0 = 4.0274e-04
Loss = 7.9051e-03, PNorm = 162.4729, GNorm = 0.6663, lr_0 = 4.0247e-04
Loss = 1.0121e-02, PNorm = 162.4911, GNorm = 0.1574, lr_0 = 4.0219e-04
Loss = 1.1123e-02, PNorm = 162.5056, GNorm = 0.2735, lr_0 = 4.0192e-04
Loss = 9.4208e-03, PNorm = 162.5196, GNorm = 0.2888, lr_0 = 4.0164e-04
Loss = 7.8167e-03, PNorm = 162.5331, GNorm = 0.1872, lr_0 = 4.0137e-04
Loss = 9.8646e-03, PNorm = 162.5482, GNorm = 0.1734, lr_0 = 4.0109e-04
Loss = 9.7839e-03, PNorm = 162.5694, GNorm = 0.3665, lr_0 = 4.0082e-04
Loss = 9.5882e-03, PNorm = 162.5824, GNorm = 0.4994, lr_0 = 4.0054e-04
Loss = 9.1072e-03, PNorm = 162.6019, GNorm = 0.0816, lr_0 = 4.0027e-04
Loss = 7.7801e-03, PNorm = 162.6172, GNorm = 0.6148, lr_0 = 3.9999e-04
Loss = 8.0024e-03, PNorm = 162.6343, GNorm = 0.2691, lr_0 = 3.9972e-04
Loss = 6.7858e-03, PNorm = 162.6456, GNorm = 0.3677, lr_0 = 3.9945e-04
Loss = 7.8220e-03, PNorm = 162.6606, GNorm = 0.1729, lr_0 = 3.9917e-04
Loss = 8.0929e-03, PNorm = 162.6730, GNorm = 0.2334, lr_0 = 3.9890e-04
Loss = 8.1224e-03, PNorm = 162.6877, GNorm = 0.1125, lr_0 = 3.9863e-04
Loss = 8.2783e-03, PNorm = 162.7084, GNorm = 0.2603, lr_0 = 3.9835e-04
Loss = 9.1934e-03, PNorm = 162.7281, GNorm = 0.2343, lr_0 = 3.9808e-04
Loss = 1.1184e-02, PNorm = 162.7460, GNorm = 0.2140, lr_0 = 3.9781e-04
Loss = 8.0448e-03, PNorm = 162.7625, GNorm = 0.1689, lr_0 = 3.9753e-04
Loss = 8.5860e-03, PNorm = 162.7785, GNorm = 0.1843, lr_0 = 3.9726e-04
Loss = 8.8157e-03, PNorm = 162.7983, GNorm = 0.1473, lr_0 = 3.9699e-04
Loss = 6.2598e-03, PNorm = 162.8178, GNorm = 0.1890, lr_0 = 3.9672e-04
Loss = 9.3304e-03, PNorm = 162.8374, GNorm = 0.2268, lr_0 = 3.9645e-04
Loss = 8.9379e-03, PNorm = 162.8546, GNorm = 0.2663, lr_0 = 3.9617e-04
Loss = 7.6958e-03, PNorm = 162.8766, GNorm = 0.3045, lr_0 = 3.9590e-04
Loss = 9.2879e-03, PNorm = 162.8947, GNorm = 0.2246, lr_0 = 3.9563e-04
Loss = 6.7039e-03, PNorm = 162.9108, GNorm = 0.1143, lr_0 = 3.9536e-04
Loss = 7.7089e-03, PNorm = 162.9307, GNorm = 0.1246, lr_0 = 3.9509e-04
Loss = 1.0182e-02, PNorm = 162.9505, GNorm = 0.1952, lr_0 = 3.9482e-04
Loss = 8.7479e-03, PNorm = 162.9683, GNorm = 0.1223, lr_0 = 3.9455e-04
Loss = 8.6368e-03, PNorm = 162.9849, GNorm = 0.2766, lr_0 = 3.9428e-04
Loss = 7.4798e-03, PNorm = 163.0026, GNorm = 0.2303, lr_0 = 3.9401e-04
Loss = 6.9323e-03, PNorm = 163.0209, GNorm = 0.3214, lr_0 = 3.9374e-04
Loss = 8.3711e-03, PNorm = 163.0349, GNorm = 0.2694, lr_0 = 3.9347e-04
Loss = 7.0969e-03, PNorm = 163.0513, GNorm = 0.4588, lr_0 = 3.9320e-04
Loss = 8.6920e-03, PNorm = 163.0648, GNorm = 0.3887, lr_0 = 3.9293e-04
Loss = 9.8058e-03, PNorm = 163.0850, GNorm = 0.4555, lr_0 = 3.9266e-04
Loss = 1.0338e-02, PNorm = 163.1029, GNorm = 0.4301, lr_0 = 3.9239e-04
Loss = 9.2960e-03, PNorm = 163.1201, GNorm = 0.5078, lr_0 = 3.9212e-04
Loss = 9.6751e-03, PNorm = 163.1395, GNorm = 0.3134, lr_0 = 3.9185e-04
Loss = 8.6408e-03, PNorm = 163.1619, GNorm = 0.1709, lr_0 = 3.9159e-04
Loss = 6.9269e-03, PNorm = 163.1822, GNorm = 0.6424, lr_0 = 3.9132e-04
Loss = 8.1426e-03, PNorm = 163.1999, GNorm = 0.2722, lr_0 = 3.9105e-04
Loss = 8.0756e-03, PNorm = 163.2196, GNorm = 0.2927, lr_0 = 3.9078e-04
Loss = 1.0281e-02, PNorm = 163.2397, GNorm = 0.3049, lr_0 = 3.9051e-04
Loss = 7.8771e-03, PNorm = 163.2572, GNorm = 0.2273, lr_0 = 3.9025e-04
Loss = 9.3298e-03, PNorm = 163.2736, GNorm = 0.1550, lr_0 = 3.8998e-04
Loss = 7.8220e-03, PNorm = 163.2914, GNorm = 0.4688, lr_0 = 3.8971e-04
Loss = 7.7559e-03, PNorm = 163.3108, GNorm = 0.1493, lr_0 = 3.8945e-04
Loss = 8.0399e-03, PNorm = 163.3283, GNorm = 0.2869, lr_0 = 3.8918e-04
Loss = 7.6702e-03, PNorm = 163.3447, GNorm = 0.3153, lr_0 = 3.8891e-04
Loss = 6.8870e-03, PNorm = 163.3618, GNorm = 0.1952, lr_0 = 3.8865e-04
Loss = 8.1087e-03, PNorm = 163.3815, GNorm = 0.1788, lr_0 = 3.8838e-04
Loss = 9.7744e-03, PNorm = 163.3997, GNorm = 0.1559, lr_0 = 3.8811e-04
Loss = 8.4071e-03, PNorm = 163.4217, GNorm = 0.2711, lr_0 = 3.8785e-04
Loss = 9.2790e-03, PNorm = 163.4365, GNorm = 0.2020, lr_0 = 3.8758e-04
Loss = 7.0833e-03, PNorm = 163.4516, GNorm = 0.1940, lr_0 = 3.8732e-04
Loss = 6.5031e-03, PNorm = 163.4684, GNorm = 0.2138, lr_0 = 3.8705e-04
Loss = 7.9096e-03, PNorm = 163.4850, GNorm = 0.2379, lr_0 = 3.8679e-04
Loss = 7.3369e-03, PNorm = 163.5027, GNorm = 0.2533, lr_0 = 3.8652e-04
Loss = 7.9684e-03, PNorm = 163.5202, GNorm = 0.4747, lr_0 = 3.8626e-04
Loss = 9.8790e-03, PNorm = 163.5417, GNorm = 0.1169, lr_0 = 3.8599e-04
Loss = 6.6876e-03, PNorm = 163.5617, GNorm = 0.2807, lr_0 = 3.8573e-04
Loss = 7.7548e-03, PNorm = 163.5814, GNorm = 0.1952, lr_0 = 3.8546e-04
Loss = 9.3978e-03, PNorm = 163.5995, GNorm = 0.1641, lr_0 = 3.8520e-04
Loss = 7.1834e-03, PNorm = 163.6154, GNorm = 0.2460, lr_0 = 3.8493e-04
Loss = 8.7049e-03, PNorm = 163.6317, GNorm = 0.1730, lr_0 = 3.8467e-04
Loss = 8.3713e-03, PNorm = 163.6513, GNorm = 0.2707, lr_0 = 3.8441e-04
Loss = 8.6563e-03, PNorm = 163.6702, GNorm = 0.4498, lr_0 = 3.8414e-04
Loss = 9.2936e-03, PNorm = 163.6907, GNorm = 0.2002, lr_0 = 3.8388e-04
Loss = 8.9631e-03, PNorm = 163.7109, GNorm = 0.1476, lr_0 = 3.8362e-04
Loss = 8.4644e-03, PNorm = 163.7308, GNorm = 0.1616, lr_0 = 3.8336e-04
Loss = 8.7592e-03, PNorm = 163.7505, GNorm = 0.4712, lr_0 = 3.8309e-04
Loss = 9.9204e-03, PNorm = 163.7737, GNorm = 0.1014, lr_0 = 3.8283e-04
Loss = 7.5904e-03, PNorm = 163.7885, GNorm = 0.5995, lr_0 = 3.8257e-04
Loss = 9.3455e-03, PNorm = 163.8042, GNorm = 0.4763, lr_0 = 3.8231e-04
Loss = 9.7541e-03, PNorm = 163.8228, GNorm = 0.0727, lr_0 = 3.8204e-04
Loss = 8.8180e-03, PNorm = 163.8416, GNorm = 0.1869, lr_0 = 3.8178e-04
Loss = 9.1655e-03, PNorm = 163.8645, GNorm = 0.3360, lr_0 = 3.8152e-04
Loss = 1.0079e-02, PNorm = 163.8853, GNorm = 0.1872, lr_0 = 3.8126e-04
Loss = 8.0322e-03, PNorm = 163.9045, GNorm = 0.1506, lr_0 = 3.8100e-04
Loss = 1.0093e-02, PNorm = 163.9229, GNorm = 0.5474, lr_0 = 3.8074e-04
Loss = 1.0061e-02, PNorm = 163.9404, GNorm = 0.2022, lr_0 = 3.8048e-04
Loss = 9.1495e-03, PNorm = 163.9594, GNorm = 0.2078, lr_0 = 3.8022e-04
Loss = 1.0297e-02, PNorm = 163.9824, GNorm = 0.6029, lr_0 = 3.7995e-04
Loss = 9.3339e-03, PNorm = 164.0050, GNorm = 0.3054, lr_0 = 3.7969e-04
Loss = 9.9714e-03, PNorm = 164.0265, GNorm = 0.1754, lr_0 = 3.7943e-04
Loss = 8.7635e-03, PNorm = 164.0458, GNorm = 0.2099, lr_0 = 3.7917e-04
Loss = 9.0129e-03, PNorm = 164.0630, GNorm = 0.3168, lr_0 = 3.7891e-04
Loss = 8.6874e-03, PNorm = 164.0843, GNorm = 0.2542, lr_0 = 3.7866e-04
Loss = 1.0762e-02, PNorm = 164.1048, GNorm = 0.2089, lr_0 = 3.7840e-04
Loss = 8.7730e-03, PNorm = 164.1274, GNorm = 0.2586, lr_0 = 3.7814e-04
Loss = 8.3606e-03, PNorm = 164.1471, GNorm = 0.4682, lr_0 = 3.7788e-04
Loss = 1.0130e-02, PNorm = 164.1717, GNorm = 0.4086, lr_0 = 3.7762e-04
Loss = 7.9588e-03, PNorm = 164.1957, GNorm = 0.2512, lr_0 = 3.7736e-04
Loss = 8.6074e-03, PNorm = 164.2178, GNorm = 0.3706, lr_0 = 3.7710e-04
Loss = 9.5781e-03, PNorm = 164.2422, GNorm = 0.0969, lr_0 = 3.7684e-04
Loss = 7.9087e-03, PNorm = 164.2651, GNorm = 0.1702, lr_0 = 3.7659e-04
Loss = 8.8507e-03, PNorm = 164.2854, GNorm = 0.6341, lr_0 = 3.7633e-04
Loss = 9.6239e-03, PNorm = 164.3020, GNorm = 0.3149, lr_0 = 3.7607e-04
Loss = 8.5984e-03, PNorm = 164.3185, GNorm = 0.1566, lr_0 = 3.7581e-04
Loss = 8.0719e-03, PNorm = 164.3360, GNorm = 0.1932, lr_0 = 3.7555e-04
Loss = 9.9726e-03, PNorm = 164.3575, GNorm = 0.1714, lr_0 = 3.7530e-04
Loss = 1.1572e-02, PNorm = 164.3787, GNorm = 0.2459, lr_0 = 3.7504e-04
Loss = 8.0688e-03, PNorm = 164.4030, GNorm = 0.2416, lr_0 = 3.7478e-04
Loss = 7.3218e-03, PNorm = 164.4238, GNorm = 0.2285, lr_0 = 3.7453e-04
Loss = 8.0678e-03, PNorm = 164.4457, GNorm = 0.2949, lr_0 = 3.7427e-04
Loss = 7.8805e-03, PNorm = 164.4604, GNorm = 0.1010, lr_0 = 3.7401e-04
Loss = 9.0700e-03, PNorm = 164.4799, GNorm = 0.0973, lr_0 = 3.7376e-04
Loss = 1.0315e-02, PNorm = 164.4964, GNorm = 0.4404, lr_0 = 3.7350e-04
Loss = 1.1019e-02, PNorm = 164.5183, GNorm = 0.2345, lr_0 = 3.7325e-04
Loss = 8.9735e-03, PNorm = 164.5417, GNorm = 0.2998, lr_0 = 3.7299e-04
Loss = 8.3576e-03, PNorm = 164.5669, GNorm = 0.5386, lr_0 = 3.7273e-04
Validation mae = 0.280060
Epoch 14
Loss = 9.3011e-03, PNorm = 164.5835, GNorm = 0.4079, lr_0 = 3.7248e-04
Loss = 7.7459e-03, PNorm = 164.5996, GNorm = 0.1891, lr_0 = 3.7222e-04
Loss = 7.2718e-03, PNorm = 164.6128, GNorm = 0.3541, lr_0 = 3.7197e-04
Loss = 7.1288e-03, PNorm = 164.6284, GNorm = 0.1527, lr_0 = 3.7171e-04
Loss = 7.6171e-03, PNorm = 164.6412, GNorm = 0.0974, lr_0 = 3.7146e-04
Loss = 7.6112e-03, PNorm = 164.6584, GNorm = 0.1776, lr_0 = 3.7120e-04
Loss = 6.5095e-03, PNorm = 164.6742, GNorm = 0.2497, lr_0 = 3.7095e-04
Loss = 7.3091e-03, PNorm = 164.6890, GNorm = 0.1652, lr_0 = 3.7070e-04
Loss = 5.8413e-03, PNorm = 164.6998, GNorm = 0.1795, lr_0 = 3.7044e-04
Loss = 6.2623e-03, PNorm = 164.7074, GNorm = 0.3321, lr_0 = 3.7019e-04
Loss = 7.3306e-03, PNorm = 164.7194, GNorm = 0.3801, lr_0 = 3.6993e-04
Loss = 8.6453e-03, PNorm = 164.7354, GNorm = 0.3594, lr_0 = 3.6968e-04
Loss = 9.5331e-03, PNorm = 164.7512, GNorm = 0.1687, lr_0 = 3.6943e-04
Loss = 6.7665e-03, PNorm = 164.7654, GNorm = 0.1649, lr_0 = 3.6917e-04
Loss = 8.4291e-03, PNorm = 164.7763, GNorm = 0.3134, lr_0 = 3.6892e-04
Loss = 7.1389e-03, PNorm = 164.7893, GNorm = 0.2904, lr_0 = 3.6867e-04
Loss = 1.0332e-02, PNorm = 164.7999, GNorm = 0.2960, lr_0 = 3.6842e-04
Loss = 7.1923e-03, PNorm = 164.8095, GNorm = 0.1579, lr_0 = 3.6816e-04
Loss = 6.6121e-03, PNorm = 164.8236, GNorm = 0.1782, lr_0 = 3.6791e-04
Loss = 7.3641e-03, PNorm = 164.8407, GNorm = 0.1610, lr_0 = 3.6766e-04
Loss = 7.9551e-03, PNorm = 164.8538, GNorm = 0.2215, lr_0 = 3.6741e-04
Loss = 8.5414e-03, PNorm = 164.8657, GNorm = 0.1994, lr_0 = 3.6716e-04
Loss = 7.1437e-03, PNorm = 164.8741, GNorm = 0.1966, lr_0 = 3.6690e-04
Loss = 7.2857e-03, PNorm = 164.8895, GNorm = 0.2802, lr_0 = 3.6665e-04
Loss = 7.0399e-03, PNorm = 164.9048, GNorm = 0.2417, lr_0 = 3.6640e-04
Loss = 7.1959e-03, PNorm = 164.9221, GNorm = 0.1057, lr_0 = 3.6615e-04
Loss = 7.8692e-03, PNorm = 164.9363, GNorm = 0.3451, lr_0 = 3.6590e-04
Loss = 8.8331e-03, PNorm = 164.9535, GNorm = 0.0808, lr_0 = 3.6565e-04
Loss = 5.9481e-03, PNorm = 164.9685, GNorm = 0.1911, lr_0 = 3.6540e-04
Loss = 7.1038e-03, PNorm = 164.9872, GNorm = 0.2785, lr_0 = 3.6515e-04
Loss = 6.4360e-03, PNorm = 165.0025, GNorm = 0.3259, lr_0 = 3.6490e-04
Loss = 5.2918e-03, PNorm = 165.0198, GNorm = 0.4760, lr_0 = 3.6465e-04
Loss = 6.2484e-03, PNorm = 165.0358, GNorm = 0.5995, lr_0 = 3.6440e-04
Loss = 1.0182e-02, PNorm = 165.0461, GNorm = 0.5294, lr_0 = 3.6415e-04
Loss = 7.0952e-03, PNorm = 165.0605, GNorm = 0.1886, lr_0 = 3.6390e-04
Loss = 6.1847e-03, PNorm = 165.0772, GNorm = 0.1549, lr_0 = 3.6365e-04
Loss = 9.3197e-03, PNorm = 165.0918, GNorm = 0.4828, lr_0 = 3.6340e-04
Loss = 8.8966e-03, PNorm = 165.1113, GNorm = 0.7362, lr_0 = 3.6315e-04
Loss = 8.5174e-03, PNorm = 165.1272, GNorm = 0.1432, lr_0 = 3.6290e-04
Loss = 7.0091e-03, PNorm = 165.1500, GNorm = 0.3870, lr_0 = 3.6266e-04
Loss = 7.9824e-03, PNorm = 165.1669, GNorm = 0.2044, lr_0 = 3.6241e-04
Loss = 8.5076e-03, PNorm = 165.1840, GNorm = 0.0868, lr_0 = 3.6216e-04
Loss = 6.3883e-03, PNorm = 165.2008, GNorm = 0.2249, lr_0 = 3.6191e-04
Loss = 6.8697e-03, PNorm = 165.2153, GNorm = 0.1403, lr_0 = 3.6166e-04
Loss = 7.3230e-03, PNorm = 165.2264, GNorm = 0.7595, lr_0 = 3.6141e-04
Loss = 7.2573e-03, PNorm = 165.2428, GNorm = 0.5229, lr_0 = 3.6117e-04
Loss = 6.2984e-03, PNorm = 165.2574, GNorm = 0.2449, lr_0 = 3.6092e-04
Loss = 7.5531e-03, PNorm = 165.2751, GNorm = 0.2057, lr_0 = 3.6067e-04
Loss = 6.4053e-03, PNorm = 165.2881, GNorm = 0.1427, lr_0 = 3.6043e-04
Loss = 8.1429e-03, PNorm = 165.3011, GNorm = 0.1738, lr_0 = 3.6018e-04
Loss = 7.0356e-03, PNorm = 165.3122, GNorm = 0.4701, lr_0 = 3.5993e-04
Loss = 9.2701e-03, PNorm = 165.3267, GNorm = 0.6839, lr_0 = 3.5969e-04
Loss = 5.9829e-03, PNorm = 165.3392, GNorm = 0.0632, lr_0 = 3.5944e-04
Loss = 9.8976e-03, PNorm = 165.3546, GNorm = 0.2497, lr_0 = 3.5919e-04
Loss = 7.2849e-03, PNorm = 165.3708, GNorm = 0.1193, lr_0 = 3.5895e-04
Loss = 6.5759e-03, PNorm = 165.3887, GNorm = 0.2140, lr_0 = 3.5870e-04
Loss = 7.0905e-03, PNorm = 165.4053, GNorm = 0.2141, lr_0 = 3.5845e-04
Loss = 7.2575e-03, PNorm = 165.4236, GNorm = 0.3218, lr_0 = 3.5821e-04
Loss = 7.1544e-03, PNorm = 165.4388, GNorm = 0.4923, lr_0 = 3.5796e-04
Loss = 8.3802e-03, PNorm = 165.4542, GNorm = 0.3228, lr_0 = 3.5772e-04
Loss = 6.1450e-03, PNorm = 165.4670, GNorm = 0.3543, lr_0 = 3.5747e-04
Loss = 6.1346e-03, PNorm = 165.4807, GNorm = 0.3501, lr_0 = 3.5723e-04
Loss = 5.6319e-03, PNorm = 165.4939, GNorm = 0.1889, lr_0 = 3.5698e-04
Loss = 5.7658e-03, PNorm = 165.5057, GNorm = 0.2009, lr_0 = 3.5674e-04
Loss = 7.9894e-03, PNorm = 165.5188, GNorm = 0.1651, lr_0 = 3.5650e-04
Loss = 8.3323e-03, PNorm = 165.5314, GNorm = 0.4195, lr_0 = 3.5625e-04
Loss = 7.4708e-03, PNorm = 165.5411, GNorm = 0.1991, lr_0 = 3.5601e-04
Loss = 7.8113e-03, PNorm = 165.5557, GNorm = 0.1923, lr_0 = 3.5576e-04
Loss = 6.9969e-03, PNorm = 165.5691, GNorm = 0.1618, lr_0 = 3.5552e-04
Loss = 5.8702e-03, PNorm = 165.5848, GNorm = 0.4010, lr_0 = 3.5528e-04
Loss = 7.7252e-03, PNorm = 165.6002, GNorm = 0.2572, lr_0 = 3.5503e-04
Loss = 6.7085e-03, PNorm = 165.6168, GNorm = 0.3766, lr_0 = 3.5479e-04
Loss = 7.4962e-03, PNorm = 165.6380, GNorm = 0.1174, lr_0 = 3.5455e-04
Loss = 9.0193e-03, PNorm = 165.6578, GNorm = 0.4909, lr_0 = 3.5430e-04
Loss = 7.8412e-03, PNorm = 165.6763, GNorm = 0.3274, lr_0 = 3.5406e-04
Loss = 6.3018e-03, PNorm = 165.6914, GNorm = 0.3551, lr_0 = 3.5382e-04
Loss = 8.9542e-03, PNorm = 165.7049, GNorm = 0.1486, lr_0 = 3.5358e-04
Loss = 7.8895e-03, PNorm = 165.7180, GNorm = 0.4239, lr_0 = 3.5333e-04
Loss = 1.0128e-02, PNorm = 165.7319, GNorm = 0.3556, lr_0 = 3.5309e-04
Loss = 6.6908e-03, PNorm = 165.7445, GNorm = 0.1381, lr_0 = 3.5285e-04
Loss = 7.1355e-03, PNorm = 165.7585, GNorm = 0.8005, lr_0 = 3.5261e-04
Loss = 6.7398e-03, PNorm = 165.7738, GNorm = 0.2303, lr_0 = 3.5237e-04
Loss = 7.8306e-03, PNorm = 165.7949, GNorm = 0.2029, lr_0 = 3.5212e-04
Loss = 7.0172e-03, PNorm = 165.8146, GNorm = 0.3725, lr_0 = 3.5188e-04
Loss = 7.4020e-03, PNorm = 165.8310, GNorm = 0.1524, lr_0 = 3.5164e-04
Loss = 6.0513e-03, PNorm = 165.8456, GNorm = 0.2370, lr_0 = 3.5140e-04
Loss = 7.4799e-03, PNorm = 165.8577, GNorm = 0.1146, lr_0 = 3.5116e-04
Loss = 6.9117e-03, PNorm = 165.8739, GNorm = 0.3511, lr_0 = 3.5092e-04
Loss = 7.8053e-03, PNorm = 165.8884, GNorm = 0.3477, lr_0 = 3.5068e-04
Loss = 7.4938e-03, PNorm = 165.9052, GNorm = 0.1198, lr_0 = 3.5044e-04
Loss = 6.8690e-03, PNorm = 165.9203, GNorm = 0.2840, lr_0 = 3.5020e-04
Loss = 9.8112e-03, PNorm = 165.9377, GNorm = 0.2200, lr_0 = 3.4996e-04
Loss = 6.6657e-03, PNorm = 165.9562, GNorm = 0.4156, lr_0 = 3.4972e-04
Loss = 6.5877e-03, PNorm = 165.9734, GNorm = 0.1461, lr_0 = 3.4948e-04
Loss = 7.3588e-03, PNorm = 165.9916, GNorm = 0.2062, lr_0 = 3.4924e-04
Loss = 7.7943e-03, PNorm = 166.0110, GNorm = 0.3396, lr_0 = 3.4900e-04
Loss = 6.9355e-03, PNorm = 166.0283, GNorm = 0.3630, lr_0 = 3.4876e-04
Loss = 6.3751e-03, PNorm = 166.0455, GNorm = 0.3131, lr_0 = 3.4852e-04
Loss = 7.6243e-03, PNorm = 166.0629, GNorm = 0.1139, lr_0 = 3.4828e-04
Loss = 8.5742e-03, PNorm = 166.0756, GNorm = 0.1826, lr_0 = 3.4805e-04
Loss = 7.1953e-03, PNorm = 166.0898, GNorm = 0.3048, lr_0 = 3.4781e-04
Loss = 7.1664e-03, PNorm = 166.1016, GNorm = 0.3739, lr_0 = 3.4757e-04
Loss = 8.3630e-03, PNorm = 166.1188, GNorm = 0.3072, lr_0 = 3.4733e-04
Loss = 6.3561e-03, PNorm = 166.1370, GNorm = 0.4062, lr_0 = 3.4709e-04
Loss = 6.0562e-03, PNorm = 166.1543, GNorm = 0.3898, lr_0 = 3.4686e-04
Loss = 6.8978e-03, PNorm = 166.1695, GNorm = 0.2476, lr_0 = 3.4662e-04
Loss = 6.8175e-03, PNorm = 166.1875, GNorm = 0.1370, lr_0 = 3.4638e-04
Loss = 6.3162e-03, PNorm = 166.2051, GNorm = 0.5412, lr_0 = 3.4614e-04
Loss = 6.1671e-03, PNorm = 166.2253, GNorm = 0.1920, lr_0 = 3.4591e-04
Loss = 6.3150e-03, PNorm = 166.2389, GNorm = 0.2472, lr_0 = 3.4567e-04
Loss = 6.8399e-03, PNorm = 166.2531, GNorm = 0.1752, lr_0 = 3.4543e-04
Loss = 7.1290e-03, PNorm = 166.2656, GNorm = 0.1381, lr_0 = 3.4520e-04
Loss = 8.1140e-03, PNorm = 166.2824, GNorm = 0.1978, lr_0 = 3.4496e-04
Loss = 8.4580e-03, PNorm = 166.3044, GNorm = 0.6440, lr_0 = 3.4472e-04
Loss = 8.2180e-03, PNorm = 166.3230, GNorm = 0.2134, lr_0 = 3.4449e-04
Loss = 6.3063e-03, PNorm = 166.3408, GNorm = 0.4240, lr_0 = 3.4425e-04
Loss = 6.1111e-03, PNorm = 166.3576, GNorm = 0.1281, lr_0 = 3.4402e-04
Loss = 6.1312e-03, PNorm = 166.3720, GNorm = 0.2008, lr_0 = 3.4378e-04
Loss = 8.6579e-03, PNorm = 166.3864, GNorm = 0.1105, lr_0 = 3.4354e-04
Loss = 6.7888e-03, PNorm = 166.4042, GNorm = 0.1311, lr_0 = 3.4331e-04
Validation mae = 0.279987
Epoch 15
Loss = 6.4174e-03, PNorm = 166.4155, GNorm = 0.2674, lr_0 = 3.4307e-04
Loss = 7.1864e-03, PNorm = 166.4307, GNorm = 0.1721, lr_0 = 3.4284e-04
Loss = 8.4918e-03, PNorm = 166.4393, GNorm = 0.1064, lr_0 = 3.4260e-04
Loss = 6.8984e-03, PNorm = 166.4539, GNorm = 0.4709, lr_0 = 3.4237e-04
Loss = 6.5327e-03, PNorm = 166.4658, GNorm = 0.2947, lr_0 = 3.4213e-04
Loss = 6.8054e-03, PNorm = 166.4793, GNorm = 0.1659, lr_0 = 3.4190e-04
Loss = 6.1395e-03, PNorm = 166.4917, GNorm = 0.1543, lr_0 = 3.4167e-04
Loss = 5.5811e-03, PNorm = 166.5030, GNorm = 0.0967, lr_0 = 3.4143e-04
Loss = 7.6105e-03, PNorm = 166.5114, GNorm = 0.3800, lr_0 = 3.4120e-04
Loss = 7.0087e-03, PNorm = 166.5224, GNorm = 0.4401, lr_0 = 3.4096e-04
Loss = 5.1006e-03, PNorm = 166.5364, GNorm = 0.1559, lr_0 = 3.4073e-04
Loss = 6.6653e-03, PNorm = 166.5451, GNorm = 0.2976, lr_0 = 3.4050e-04
Loss = 6.9726e-03, PNorm = 166.5549, GNorm = 0.2232, lr_0 = 3.4026e-04
Loss = 5.3676e-03, PNorm = 166.5642, GNorm = 0.1575, lr_0 = 3.4003e-04
Loss = 6.5970e-03, PNorm = 166.5743, GNorm = 0.3092, lr_0 = 3.3980e-04
Loss = 5.7986e-03, PNorm = 166.5859, GNorm = 0.3096, lr_0 = 3.3956e-04
Loss = 7.3326e-03, PNorm = 166.5993, GNorm = 0.4753, lr_0 = 3.3933e-04
Loss = 6.1685e-03, PNorm = 166.6118, GNorm = 0.1913, lr_0 = 3.3910e-04
Loss = 6.9659e-03, PNorm = 166.6239, GNorm = 0.6477, lr_0 = 3.3887e-04
Loss = 6.5172e-03, PNorm = 166.6369, GNorm = 0.3556, lr_0 = 3.3864e-04
Loss = 6.7188e-03, PNorm = 166.6467, GNorm = 0.4029, lr_0 = 3.3840e-04
Loss = 7.9411e-03, PNorm = 166.6554, GNorm = 0.4157, lr_0 = 3.3817e-04
Loss = 7.4699e-03, PNorm = 166.6662, GNorm = 0.0840, lr_0 = 3.3794e-04
Loss = 6.2450e-03, PNorm = 166.6793, GNorm = 0.2884, lr_0 = 3.3771e-04
Loss = 4.4560e-03, PNorm = 166.6940, GNorm = 0.1182, lr_0 = 3.3748e-04
Loss = 5.9977e-03, PNorm = 166.7066, GNorm = 0.2011, lr_0 = 3.3725e-04
Loss = 5.8256e-03, PNorm = 166.7199, GNorm = 0.2125, lr_0 = 3.3701e-04
Loss = 6.4439e-03, PNorm = 166.7306, GNorm = 0.1343, lr_0 = 3.3678e-04
Loss = 5.7927e-03, PNorm = 166.7448, GNorm = 0.0942, lr_0 = 3.3655e-04
Loss = 5.9744e-03, PNorm = 166.7551, GNorm = 0.2098, lr_0 = 3.3632e-04
Loss = 5.8895e-03, PNorm = 166.7661, GNorm = 0.3537, lr_0 = 3.3609e-04
Loss = 1.2406e-02, PNorm = 166.7762, GNorm = 0.1711, lr_0 = 3.3586e-04
Loss = 7.6899e-03, PNorm = 166.7889, GNorm = 0.3033, lr_0 = 3.3563e-04
Loss = 5.3535e-03, PNorm = 166.8001, GNorm = 0.1843, lr_0 = 3.3540e-04
Loss = 7.8583e-03, PNorm = 166.8126, GNorm = 0.5245, lr_0 = 3.3517e-04
Loss = 6.3907e-03, PNorm = 166.8242, GNorm = 0.0873, lr_0 = 3.3494e-04
Loss = 9.3994e-03, PNorm = 166.8351, GNorm = 0.4105, lr_0 = 3.3471e-04
Loss = 7.5358e-03, PNorm = 166.8485, GNorm = 0.2274, lr_0 = 3.3448e-04
Loss = 5.7709e-03, PNorm = 166.8648, GNorm = 0.2887, lr_0 = 3.3425e-04
Loss = 8.1178e-03, PNorm = 166.8793, GNorm = 0.3622, lr_0 = 3.3403e-04
Loss = 8.1384e-03, PNorm = 166.8969, GNorm = 0.5802, lr_0 = 3.3380e-04
Loss = 7.2987e-03, PNorm = 166.9098, GNorm = 0.4834, lr_0 = 3.3357e-04
Loss = 4.8230e-03, PNorm = 166.9225, GNorm = 0.2042, lr_0 = 3.3334e-04
Loss = 5.7841e-03, PNorm = 166.9360, GNorm = 0.1384, lr_0 = 3.3311e-04
Loss = 4.6251e-03, PNorm = 166.9475, GNorm = 0.1175, lr_0 = 3.3288e-04
Loss = 5.7719e-03, PNorm = 166.9593, GNorm = 0.1973, lr_0 = 3.3265e-04
Loss = 6.7817e-03, PNorm = 166.9741, GNorm = 0.2381, lr_0 = 3.3243e-04
Loss = 5.7416e-03, PNorm = 166.9872, GNorm = 0.5898, lr_0 = 3.3220e-04
Loss = 5.8004e-03, PNorm = 167.0005, GNorm = 0.1734, lr_0 = 3.3197e-04
Loss = 5.0364e-03, PNorm = 167.0128, GNorm = 0.1897, lr_0 = 3.3174e-04
Loss = 5.6916e-03, PNorm = 167.0226, GNorm = 0.3466, lr_0 = 3.3152e-04
Loss = 5.5842e-03, PNorm = 167.0335, GNorm = 0.1839, lr_0 = 3.3129e-04
Loss = 5.9303e-03, PNorm = 167.0472, GNorm = 0.2897, lr_0 = 3.3106e-04
Loss = 5.6020e-03, PNorm = 167.0591, GNorm = 0.5098, lr_0 = 3.3084e-04
Loss = 5.5439e-03, PNorm = 167.0729, GNorm = 0.2572, lr_0 = 3.3061e-04
Loss = 7.1665e-03, PNorm = 167.0866, GNorm = 0.1312, lr_0 = 3.3038e-04
Loss = 6.7741e-03, PNorm = 167.1004, GNorm = 0.8942, lr_0 = 3.3016e-04
Loss = 5.3757e-03, PNorm = 167.1105, GNorm = 0.1169, lr_0 = 3.2993e-04
Loss = 4.7363e-03, PNorm = 167.1224, GNorm = 0.1248, lr_0 = 3.2970e-04
Loss = 5.9859e-03, PNorm = 167.1327, GNorm = 0.2641, lr_0 = 3.2948e-04
Loss = 6.7530e-03, PNorm = 167.1462, GNorm = 0.1521, lr_0 = 3.2925e-04
Loss = 5.5998e-03, PNorm = 167.1603, GNorm = 0.1393, lr_0 = 3.2903e-04
Loss = 8.1477e-03, PNorm = 167.1755, GNorm = 0.2175, lr_0 = 3.2880e-04
Loss = 6.3902e-03, PNorm = 167.1932, GNorm = 0.3727, lr_0 = 3.2858e-04
Loss = 5.9221e-03, PNorm = 167.2039, GNorm = 0.0767, lr_0 = 3.2835e-04
Loss = 7.9151e-03, PNorm = 167.2157, GNorm = 0.1881, lr_0 = 3.2813e-04
Loss = 7.8528e-03, PNorm = 167.2282, GNorm = 0.4012, lr_0 = 3.2790e-04
Loss = 5.8516e-03, PNorm = 167.2392, GNorm = 0.0892, lr_0 = 3.2768e-04
Loss = 7.0169e-03, PNorm = 167.2509, GNorm = 0.0877, lr_0 = 3.2745e-04
Loss = 4.8096e-03, PNorm = 167.2618, GNorm = 0.2469, lr_0 = 3.2723e-04
Loss = 5.3651e-03, PNorm = 167.2742, GNorm = 0.0907, lr_0 = 3.2700e-04
Loss = 6.4106e-03, PNorm = 167.2897, GNorm = 0.5196, lr_0 = 3.2678e-04
Loss = 5.6898e-03, PNorm = 167.3031, GNorm = 0.1230, lr_0 = 3.2656e-04
Loss = 5.0496e-03, PNorm = 167.3149, GNorm = 0.1296, lr_0 = 3.2633e-04
Loss = 5.6013e-03, PNorm = 167.3246, GNorm = 0.3305, lr_0 = 3.2611e-04
Loss = 6.6654e-03, PNorm = 167.3349, GNorm = 0.1664, lr_0 = 3.2589e-04
Loss = 5.3633e-03, PNorm = 167.3481, GNorm = 0.1982, lr_0 = 3.2566e-04
Loss = 5.4173e-03, PNorm = 167.3631, GNorm = 0.2896, lr_0 = 3.2544e-04
Loss = 5.9697e-03, PNorm = 167.3718, GNorm = 0.6066, lr_0 = 3.2522e-04
Loss = 5.2346e-03, PNorm = 167.3843, GNorm = 0.2085, lr_0 = 3.2499e-04
Loss = 5.6355e-03, PNorm = 167.3986, GNorm = 0.1229, lr_0 = 3.2477e-04
Loss = 5.3373e-03, PNorm = 167.4119, GNorm = 0.1457, lr_0 = 3.2455e-04
Loss = 5.9926e-03, PNorm = 167.4262, GNorm = 0.1570, lr_0 = 3.2433e-04
Loss = 6.4805e-03, PNorm = 167.4387, GNorm = 0.4574, lr_0 = 3.2410e-04
Loss = 7.0835e-03, PNorm = 167.4519, GNorm = 0.4035, lr_0 = 3.2388e-04
Loss = 5.2194e-03, PNorm = 167.4646, GNorm = 0.1937, lr_0 = 3.2366e-04
Loss = 5.2936e-03, PNorm = 167.4755, GNorm = 0.1183, lr_0 = 3.2344e-04
Loss = 7.1028e-03, PNorm = 167.4877, GNorm = 0.1876, lr_0 = 3.2322e-04
Loss = 5.5693e-03, PNorm = 167.4983, GNorm = 0.1679, lr_0 = 3.2300e-04
Loss = 5.5358e-03, PNorm = 167.5089, GNorm = 0.1689, lr_0 = 3.2277e-04
Loss = 9.2982e-03, PNorm = 167.5215, GNorm = 0.5149, lr_0 = 3.2255e-04
Loss = 6.0178e-03, PNorm = 167.5300, GNorm = 0.1397, lr_0 = 3.2233e-04
Loss = 5.2572e-03, PNorm = 167.5407, GNorm = 0.2832, lr_0 = 3.2211e-04
Loss = 6.7525e-03, PNorm = 167.5528, GNorm = 0.2188, lr_0 = 3.2189e-04
Loss = 6.8562e-03, PNorm = 167.5675, GNorm = 0.1615, lr_0 = 3.2167e-04
Loss = 6.8671e-03, PNorm = 167.5803, GNorm = 0.4945, lr_0 = 3.2145e-04
Loss = 6.1870e-03, PNorm = 167.5951, GNorm = 0.3073, lr_0 = 3.2123e-04
Loss = 4.8706e-03, PNorm = 167.6098, GNorm = 0.1091, lr_0 = 3.2101e-04
Loss = 6.7560e-03, PNorm = 167.6213, GNorm = 0.1738, lr_0 = 3.2079e-04
Loss = 5.2811e-03, PNorm = 167.6372, GNorm = 0.2751, lr_0 = 3.2057e-04
Loss = 4.7465e-03, PNorm = 167.6506, GNorm = 0.1916, lr_0 = 3.2035e-04
Loss = 4.6906e-03, PNorm = 167.6646, GNorm = 0.3570, lr_0 = 3.2013e-04
Loss = 4.4024e-03, PNorm = 167.6778, GNorm = 0.2813, lr_0 = 3.1991e-04
Loss = 8.2675e-03, PNorm = 167.6899, GNorm = 0.2695, lr_0 = 3.1969e-04
Loss = 5.9843e-03, PNorm = 167.7024, GNorm = 0.2868, lr_0 = 3.1947e-04
Loss = 5.5422e-03, PNorm = 167.7170, GNorm = 0.2701, lr_0 = 3.1925e-04
Loss = 5.6662e-03, PNorm = 167.7284, GNorm = 0.1227, lr_0 = 3.1904e-04
Loss = 6.3244e-03, PNorm = 167.7414, GNorm = 0.1149, lr_0 = 3.1882e-04
Loss = 5.5698e-03, PNorm = 167.7591, GNorm = 0.1685, lr_0 = 3.1860e-04
Loss = 5.1470e-03, PNorm = 167.7734, GNorm = 0.1329, lr_0 = 3.1838e-04
Loss = 5.3179e-03, PNorm = 167.7848, GNorm = 0.2350, lr_0 = 3.1816e-04
Loss = 5.3142e-03, PNorm = 167.7969, GNorm = 0.2017, lr_0 = 3.1794e-04
Loss = 8.0914e-03, PNorm = 167.8064, GNorm = 0.1081, lr_0 = 3.1773e-04
Loss = 5.0073e-03, PNorm = 167.8186, GNorm = 0.3363, lr_0 = 3.1751e-04
Loss = 6.1253e-03, PNorm = 167.8344, GNorm = 0.3099, lr_0 = 3.1729e-04
Loss = 6.4914e-03, PNorm = 167.8497, GNorm = 0.3450, lr_0 = 3.1707e-04
Loss = 7.1235e-03, PNorm = 167.8670, GNorm = 0.3387, lr_0 = 3.1686e-04
Loss = 5.6209e-03, PNorm = 167.8809, GNorm = 0.3774, lr_0 = 3.1664e-04
Loss = 7.5671e-03, PNorm = 167.8988, GNorm = 0.2526, lr_0 = 3.1642e-04
Loss = 4.9695e-03, PNorm = 167.9116, GNorm = 0.1567, lr_0 = 3.1621e-04
Validation mae = 0.279909
Epoch 16
Loss = 5.4985e-03, PNorm = 167.9243, GNorm = 0.1173, lr_0 = 3.1599e-04
Loss = 7.4348e-03, PNorm = 167.9340, GNorm = 0.2584, lr_0 = 3.1577e-04
Loss = 6.2581e-03, PNorm = 167.9449, GNorm = 0.1749, lr_0 = 3.1556e-04
Loss = 7.2046e-03, PNorm = 167.9514, GNorm = 0.2284, lr_0 = 3.1534e-04
Loss = 7.7258e-03, PNorm = 167.9609, GNorm = 0.1081, lr_0 = 3.1512e-04
Loss = 5.3588e-03, PNorm = 167.9736, GNorm = 0.5457, lr_0 = 3.1491e-04
Loss = 5.0494e-03, PNorm = 167.9828, GNorm = 0.4743, lr_0 = 3.1469e-04
Loss = 6.8877e-03, PNorm = 167.9937, GNorm = 0.1566, lr_0 = 3.1448e-04
Loss = 5.7737e-03, PNorm = 168.0027, GNorm = 0.4113, lr_0 = 3.1426e-04
Loss = 5.9262e-03, PNorm = 168.0150, GNorm = 0.1685, lr_0 = 3.1405e-04
Loss = 5.1771e-03, PNorm = 168.0242, GNorm = 0.2125, lr_0 = 3.1383e-04
Loss = 5.2505e-03, PNorm = 168.0329, GNorm = 0.2732, lr_0 = 3.1362e-04
Loss = 5.2010e-03, PNorm = 168.0424, GNorm = 0.1072, lr_0 = 3.1340e-04
Loss = 4.8785e-03, PNorm = 168.0552, GNorm = 0.4294, lr_0 = 3.1319e-04
Loss = 4.2166e-03, PNorm = 168.0654, GNorm = 0.2211, lr_0 = 3.1297e-04
Loss = 7.5504e-03, PNorm = 168.0729, GNorm = 0.1213, lr_0 = 3.1276e-04
Loss = 5.8488e-03, PNorm = 168.0802, GNorm = 0.1738, lr_0 = 3.1254e-04
Loss = 5.1514e-03, PNorm = 168.0890, GNorm = 0.4684, lr_0 = 3.1233e-04
Loss = 5.1150e-03, PNorm = 168.0972, GNorm = 0.1653, lr_0 = 3.1212e-04
Loss = 7.4416e-03, PNorm = 168.1038, GNorm = 0.0847, lr_0 = 3.1190e-04
Loss = 4.2404e-03, PNorm = 168.1152, GNorm = 0.2631, lr_0 = 3.1169e-04
Loss = 5.0139e-03, PNorm = 168.1236, GNorm = 0.1189, lr_0 = 3.1147e-04
Loss = 4.3355e-03, PNorm = 168.1341, GNorm = 0.2481, lr_0 = 3.1126e-04
Loss = 4.9966e-03, PNorm = 168.1440, GNorm = 0.1603, lr_0 = 3.1105e-04
Loss = 4.8200e-03, PNorm = 168.1544, GNorm = 0.3521, lr_0 = 3.1083e-04
Loss = 4.4869e-03, PNorm = 168.1649, GNorm = 0.1544, lr_0 = 3.1062e-04
Loss = 5.2046e-03, PNorm = 168.1765, GNorm = 0.2711, lr_0 = 3.1041e-04
Loss = 4.6350e-03, PNorm = 168.1872, GNorm = 0.2736, lr_0 = 3.1020e-04
Loss = 5.0177e-03, PNorm = 168.1972, GNorm = 0.0953, lr_0 = 3.0998e-04
Loss = 5.7027e-03, PNorm = 168.2029, GNorm = 0.4650, lr_0 = 3.0977e-04
Loss = 5.9942e-03, PNorm = 168.2125, GNorm = 0.2792, lr_0 = 3.0956e-04
Loss = 8.3769e-03, PNorm = 168.2252, GNorm = 0.4777, lr_0 = 3.0935e-04
Loss = 5.1799e-03, PNorm = 168.2352, GNorm = 0.0840, lr_0 = 3.0914e-04
Loss = 6.0044e-03, PNorm = 168.2444, GNorm = 0.1392, lr_0 = 3.0892e-04
Loss = 4.7216e-03, PNorm = 168.2541, GNorm = 0.2531, lr_0 = 3.0871e-04
Loss = 6.2402e-03, PNorm = 168.2679, GNorm = 0.1848, lr_0 = 3.0850e-04
Loss = 4.9386e-03, PNorm = 168.2777, GNorm = 0.1528, lr_0 = 3.0829e-04
Loss = 5.8368e-03, PNorm = 168.2882, GNorm = 0.4141, lr_0 = 3.0808e-04
Loss = 4.7827e-03, PNorm = 168.2974, GNorm = 0.4588, lr_0 = 3.0787e-04
Loss = 5.5908e-03, PNorm = 168.3111, GNorm = 0.2679, lr_0 = 3.0766e-04
Loss = 4.1715e-03, PNorm = 168.3253, GNorm = 0.1769, lr_0 = 3.0745e-04
Loss = 4.2473e-03, PNorm = 168.3364, GNorm = 0.1201, lr_0 = 3.0723e-04
Loss = 4.6530e-03, PNorm = 168.3466, GNorm = 0.2608, lr_0 = 3.0702e-04
Loss = 4.2441e-03, PNorm = 168.3563, GNorm = 0.1925, lr_0 = 3.0681e-04
Loss = 5.1400e-03, PNorm = 168.3671, GNorm = 0.1850, lr_0 = 3.0660e-04
Loss = 4.3806e-03, PNorm = 168.3758, GNorm = 0.0772, lr_0 = 3.0639e-04
Loss = 5.4095e-03, PNorm = 168.3876, GNorm = 0.1175, lr_0 = 3.0618e-04
Loss = 5.9853e-03, PNorm = 168.3981, GNorm = 0.2274, lr_0 = 3.0597e-04
Loss = 5.0432e-03, PNorm = 168.4096, GNorm = 0.2878, lr_0 = 3.0576e-04
Loss = 6.2354e-03, PNorm = 168.4205, GNorm = 0.1668, lr_0 = 3.0555e-04
Loss = 6.1420e-03, PNorm = 168.4304, GNorm = 0.3056, lr_0 = 3.0535e-04
Loss = 5.6051e-03, PNorm = 168.4394, GNorm = 0.1159, lr_0 = 3.0514e-04
Loss = 4.6368e-03, PNorm = 168.4527, GNorm = 0.0789, lr_0 = 3.0493e-04
Loss = 5.9924e-03, PNorm = 168.4654, GNorm = 0.0870, lr_0 = 3.0472e-04
Loss = 4.3230e-03, PNorm = 168.4740, GNorm = 0.2518, lr_0 = 3.0451e-04
Loss = 4.4182e-03, PNorm = 168.4818, GNorm = 0.1518, lr_0 = 3.0430e-04
Loss = 4.4718e-03, PNorm = 168.4906, GNorm = 0.1867, lr_0 = 3.0409e-04
Loss = 4.4449e-03, PNorm = 168.4991, GNorm = 0.1408, lr_0 = 3.0388e-04
Loss = 6.8459e-03, PNorm = 168.5092, GNorm = 0.3632, lr_0 = 3.0368e-04
Loss = 5.0020e-03, PNorm = 168.5201, GNorm = 0.4798, lr_0 = 3.0347e-04
Loss = 4.7978e-03, PNorm = 168.5322, GNorm = 0.1461, lr_0 = 3.0326e-04
Loss = 5.6451e-03, PNorm = 168.5439, GNorm = 0.2567, lr_0 = 3.0305e-04
Loss = 4.6388e-03, PNorm = 168.5589, GNorm = 0.3532, lr_0 = 3.0284e-04
Loss = 4.0703e-03, PNorm = 168.5698, GNorm = 0.1001, lr_0 = 3.0264e-04
Loss = 5.6835e-03, PNorm = 168.5790, GNorm = 0.2443, lr_0 = 3.0243e-04
Loss = 4.4607e-03, PNorm = 168.5863, GNorm = 0.1008, lr_0 = 3.0222e-04
Loss = 4.5866e-03, PNorm = 168.5954, GNorm = 0.3493, lr_0 = 3.0202e-04
Loss = 4.4709e-03, PNorm = 168.6072, GNorm = 0.2358, lr_0 = 3.0181e-04
Loss = 4.8824e-03, PNorm = 168.6167, GNorm = 0.3881, lr_0 = 3.0160e-04
Loss = 5.8679e-03, PNorm = 168.6258, GNorm = 0.0856, lr_0 = 3.0140e-04
Loss = 5.4649e-03, PNorm = 168.6331, GNorm = 0.2608, lr_0 = 3.0119e-04
Loss = 4.5591e-03, PNorm = 168.6425, GNorm = 0.4273, lr_0 = 3.0098e-04
Loss = 8.7755e-03, PNorm = 168.6508, GNorm = 0.3578, lr_0 = 3.0078e-04
Loss = 6.1359e-03, PNorm = 168.6606, GNorm = 0.1731, lr_0 = 3.0057e-04
Loss = 5.1915e-03, PNorm = 168.6738, GNorm = 0.3409, lr_0 = 3.0036e-04
Loss = 3.9968e-03, PNorm = 168.6855, GNorm = 0.1866, lr_0 = 3.0016e-04
Loss = 4.5838e-03, PNorm = 168.6980, GNorm = 0.1317, lr_0 = 2.9995e-04
Loss = 5.3350e-03, PNorm = 168.7087, GNorm = 0.1281, lr_0 = 2.9975e-04
Loss = 5.9858e-03, PNorm = 168.7208, GNorm = 0.2079, lr_0 = 2.9954e-04
Loss = 5.2373e-03, PNorm = 168.7354, GNorm = 0.3739, lr_0 = 2.9934e-04
Loss = 4.9297e-03, PNorm = 168.7500, GNorm = 0.2720, lr_0 = 2.9913e-04
Loss = 7.3748e-03, PNorm = 168.7586, GNorm = 0.3404, lr_0 = 2.9893e-04
Loss = 5.5402e-03, PNorm = 168.7696, GNorm = 0.3837, lr_0 = 2.9872e-04
Loss = 4.5976e-03, PNorm = 168.7796, GNorm = 0.2049, lr_0 = 2.9852e-04
Loss = 5.7918e-03, PNorm = 168.7943, GNorm = 0.1834, lr_0 = 2.9831e-04
Loss = 5.4595e-03, PNorm = 168.8061, GNorm = 0.1444, lr_0 = 2.9811e-04
Loss = 6.2608e-03, PNorm = 168.8173, GNorm = 0.4192, lr_0 = 2.9790e-04
Loss = 5.1227e-03, PNorm = 168.8287, GNorm = 0.1824, lr_0 = 2.9770e-04
Loss = 4.9351e-03, PNorm = 168.8434, GNorm = 0.1038, lr_0 = 2.9750e-04
Loss = 4.6845e-03, PNorm = 168.8591, GNorm = 0.1518, lr_0 = 2.9729e-04
Loss = 5.2144e-03, PNorm = 168.8713, GNorm = 0.1479, lr_0 = 2.9709e-04
Loss = 4.9034e-03, PNorm = 168.8824, GNorm = 0.3850, lr_0 = 2.9689e-04
Loss = 4.7442e-03, PNorm = 168.8928, GNorm = 0.2946, lr_0 = 2.9668e-04
Loss = 5.4890e-03, PNorm = 168.9061, GNorm = 0.1404, lr_0 = 2.9648e-04
Loss = 5.7647e-03, PNorm = 168.9187, GNorm = 0.1120, lr_0 = 2.9628e-04
Loss = 4.5724e-03, PNorm = 168.9302, GNorm = 0.2855, lr_0 = 2.9607e-04
Loss = 4.7529e-03, PNorm = 168.9426, GNorm = 0.3243, lr_0 = 2.9587e-04
Loss = 4.6884e-03, PNorm = 168.9546, GNorm = 0.3683, lr_0 = 2.9567e-04
Loss = 4.9230e-03, PNorm = 168.9685, GNorm = 0.3322, lr_0 = 2.9546e-04
Loss = 5.0866e-03, PNorm = 168.9796, GNorm = 0.2504, lr_0 = 2.9526e-04
Loss = 5.1118e-03, PNorm = 168.9887, GNorm = 0.3331, lr_0 = 2.9506e-04
Loss = 4.7988e-03, PNorm = 168.9973, GNorm = 0.4063, lr_0 = 2.9486e-04
Loss = 8.1521e-03, PNorm = 169.0089, GNorm = 0.4027, lr_0 = 2.9466e-04
Loss = 5.9591e-03, PNorm = 169.0206, GNorm = 0.2847, lr_0 = 2.9445e-04
Loss = 5.8736e-03, PNorm = 169.0298, GNorm = 0.3753, lr_0 = 2.9425e-04
Loss = 4.9494e-03, PNorm = 169.0434, GNorm = 0.2056, lr_0 = 2.9405e-04
Loss = 4.8783e-03, PNorm = 169.0549, GNorm = 0.1650, lr_0 = 2.9385e-04
Loss = 6.0585e-03, PNorm = 169.0690, GNorm = 0.2384, lr_0 = 2.9365e-04
Loss = 4.3450e-03, PNorm = 169.0803, GNorm = 0.0828, lr_0 = 2.9345e-04
Loss = 5.2828e-03, PNorm = 169.0905, GNorm = 0.0859, lr_0 = 2.9325e-04
Loss = 5.5544e-03, PNorm = 169.1014, GNorm = 0.3464, lr_0 = 2.9305e-04
Loss = 4.6748e-03, PNorm = 169.1120, GNorm = 0.0852, lr_0 = 2.9284e-04
Loss = 4.5987e-03, PNorm = 169.1242, GNorm = 0.1560, lr_0 = 2.9264e-04
Loss = 5.9083e-03, PNorm = 169.1348, GNorm = 0.1101, lr_0 = 2.9244e-04
Loss = 5.2411e-03, PNorm = 169.1468, GNorm = 0.3953, lr_0 = 2.9224e-04
Loss = 5.3058e-03, PNorm = 169.1607, GNorm = 0.2319, lr_0 = 2.9204e-04
Loss = 5.9463e-03, PNorm = 169.1712, GNorm = 0.2268, lr_0 = 2.9184e-04
Loss = 5.3133e-03, PNorm = 169.1802, GNorm = 0.3831, lr_0 = 2.9164e-04
Loss = 5.5201e-03, PNorm = 169.1900, GNorm = 0.1904, lr_0 = 2.9144e-04
Loss = 4.2686e-03, PNorm = 169.2013, GNorm = 0.1370, lr_0 = 2.9124e-04
Validation mae = 0.279959
Epoch 17
Loss = 4.8133e-03, PNorm = 169.2132, GNorm = 0.6386, lr_0 = 2.9104e-04
Loss = 3.8517e-03, PNorm = 169.2223, GNorm = 0.1831, lr_0 = 2.9084e-04
Loss = 4.3989e-03, PNorm = 169.2253, GNorm = 0.1465, lr_0 = 2.9065e-04
Loss = 4.8737e-03, PNorm = 169.2320, GNorm = 0.2954, lr_0 = 2.9045e-04
Loss = 6.2448e-03, PNorm = 169.2404, GNorm = 0.0879, lr_0 = 2.9025e-04
Loss = 4.2002e-03, PNorm = 169.2504, GNorm = 0.0908, lr_0 = 2.9005e-04
Loss = 4.1546e-03, PNorm = 169.2587, GNorm = 0.1018, lr_0 = 2.8985e-04
Loss = 4.5604e-03, PNorm = 169.2671, GNorm = 0.3219, lr_0 = 2.8965e-04
Loss = 4.8973e-03, PNorm = 169.2759, GNorm = 0.2744, lr_0 = 2.8945e-04
Loss = 5.1586e-03, PNorm = 169.2841, GNorm = 0.3110, lr_0 = 2.8925e-04
Loss = 4.2626e-03, PNorm = 169.2955, GNorm = 0.1724, lr_0 = 2.8906e-04
Loss = 5.6811e-03, PNorm = 169.3067, GNorm = 0.2916, lr_0 = 2.8886e-04
Loss = 4.7946e-03, PNorm = 169.3158, GNorm = 0.0961, lr_0 = 2.8866e-04
Loss = 3.3870e-03, PNorm = 169.3246, GNorm = 0.3184, lr_0 = 2.8846e-04
Loss = 3.9148e-03, PNorm = 169.3319, GNorm = 0.1724, lr_0 = 2.8826e-04
Loss = 4.6449e-03, PNorm = 169.3430, GNorm = 0.3846, lr_0 = 2.8807e-04
Loss = 4.4170e-03, PNorm = 169.3513, GNorm = 0.2462, lr_0 = 2.8787e-04
Loss = 5.3359e-03, PNorm = 169.3571, GNorm = 0.4304, lr_0 = 2.8767e-04
Loss = 4.4000e-03, PNorm = 169.3634, GNorm = 0.3506, lr_0 = 2.8748e-04
Loss = 5.1072e-03, PNorm = 169.3712, GNorm = 0.4605, lr_0 = 2.8728e-04
Loss = 4.0975e-03, PNorm = 169.3796, GNorm = 0.1528, lr_0 = 2.8708e-04
Loss = 4.2385e-03, PNorm = 169.3879, GNorm = 0.0539, lr_0 = 2.8689e-04
Loss = 4.6112e-03, PNorm = 169.3975, GNorm = 0.1541, lr_0 = 2.8669e-04
Loss = 6.6842e-03, PNorm = 169.4070, GNorm = 0.2111, lr_0 = 2.8649e-04
Loss = 8.0657e-03, PNorm = 169.4150, GNorm = 0.0915, lr_0 = 2.8630e-04
Loss = 3.9516e-03, PNorm = 169.4218, GNorm = 0.3115, lr_0 = 2.8610e-04
Loss = 4.0238e-03, PNorm = 169.4307, GNorm = 0.1783, lr_0 = 2.8590e-04
Loss = 3.6886e-03, PNorm = 169.4360, GNorm = 0.4844, lr_0 = 2.8571e-04
Loss = 4.8576e-03, PNorm = 169.4394, GNorm = 0.2932, lr_0 = 2.8551e-04
Loss = 7.2928e-03, PNorm = 169.4453, GNorm = 0.1352, lr_0 = 2.8532e-04
Loss = 4.2625e-03, PNorm = 169.4540, GNorm = 0.0706, lr_0 = 2.8512e-04
Loss = 4.4460e-03, PNorm = 169.4651, GNorm = 0.1412, lr_0 = 2.8493e-04
Loss = 4.7292e-03, PNorm = 169.4766, GNorm = 0.1812, lr_0 = 2.8473e-04
Loss = 5.5614e-03, PNorm = 169.4849, GNorm = 0.1974, lr_0 = 2.8454e-04
Loss = 4.1303e-03, PNorm = 169.4940, GNorm = 0.1807, lr_0 = 2.8434e-04
Loss = 4.3134e-03, PNorm = 169.5013, GNorm = 0.1940, lr_0 = 2.8415e-04
Loss = 4.1438e-03, PNorm = 169.5095, GNorm = 0.1552, lr_0 = 2.8395e-04
Loss = 3.8436e-03, PNorm = 169.5173, GNorm = 0.1874, lr_0 = 2.8376e-04
Loss = 3.7444e-03, PNorm = 169.5260, GNorm = 0.1770, lr_0 = 2.8356e-04
Loss = 4.9127e-03, PNorm = 169.5349, GNorm = 0.1906, lr_0 = 2.8337e-04
Loss = 5.9825e-03, PNorm = 169.5472, GNorm = 0.3398, lr_0 = 2.8317e-04
Loss = 5.1381e-03, PNorm = 169.5570, GNorm = 0.2022, lr_0 = 2.8298e-04
Loss = 3.7137e-03, PNorm = 169.5679, GNorm = 0.2704, lr_0 = 2.8279e-04
Loss = 4.2088e-03, PNorm = 169.5770, GNorm = 0.2124, lr_0 = 2.8259e-04
Loss = 4.1205e-03, PNorm = 169.5861, GNorm = 0.0956, lr_0 = 2.8240e-04
Loss = 3.8691e-03, PNorm = 169.5962, GNorm = 0.3556, lr_0 = 2.8221e-04
Loss = 5.4493e-03, PNorm = 169.6038, GNorm = 0.2585, lr_0 = 2.8201e-04
Loss = 5.2030e-03, PNorm = 169.6094, GNorm = 0.2169, lr_0 = 2.8182e-04
Loss = 5.4854e-03, PNorm = 169.6166, GNorm = 0.1478, lr_0 = 2.8163e-04
Loss = 4.8214e-03, PNorm = 169.6230, GNorm = 0.2783, lr_0 = 2.8143e-04
Loss = 4.1430e-03, PNorm = 169.6337, GNorm = 0.3166, lr_0 = 2.8124e-04
Loss = 7.2195e-03, PNorm = 169.6419, GNorm = 0.3376, lr_0 = 2.8105e-04
Loss = 7.1525e-03, PNorm = 169.6529, GNorm = 0.2412, lr_0 = 2.8085e-04
Loss = 5.9821e-03, PNorm = 169.6640, GNorm = 0.1394, lr_0 = 2.8066e-04
Loss = 5.0875e-03, PNorm = 169.6751, GNorm = 0.1122, lr_0 = 2.8047e-04
Loss = 3.5565e-03, PNorm = 169.6862, GNorm = 0.2803, lr_0 = 2.8028e-04
Loss = 4.0899e-03, PNorm = 169.6933, GNorm = 0.0680, lr_0 = 2.8009e-04
Loss = 4.0034e-03, PNorm = 169.7022, GNorm = 0.1177, lr_0 = 2.7989e-04
Loss = 5.3720e-03, PNorm = 169.7117, GNorm = 0.2925, lr_0 = 2.7970e-04
Loss = 3.9559e-03, PNorm = 169.7189, GNorm = 0.1951, lr_0 = 2.7951e-04
Loss = 6.0242e-03, PNorm = 169.7286, GNorm = 0.5572, lr_0 = 2.7932e-04
Loss = 3.8223e-03, PNorm = 169.7370, GNorm = 0.2186, lr_0 = 2.7913e-04
Loss = 4.2861e-03, PNorm = 169.7467, GNorm = 0.2733, lr_0 = 2.7894e-04
Loss = 6.2003e-03, PNorm = 169.7570, GNorm = 0.0895, lr_0 = 2.7875e-04
Loss = 5.1987e-03, PNorm = 169.7666, GNorm = 0.1484, lr_0 = 2.7855e-04
Loss = 5.5515e-03, PNorm = 169.7733, GNorm = 0.2360, lr_0 = 2.7836e-04
Loss = 4.4196e-03, PNorm = 169.7814, GNorm = 0.0848, lr_0 = 2.7817e-04
Loss = 4.5914e-03, PNorm = 169.7905, GNorm = 0.2434, lr_0 = 2.7798e-04
Loss = 4.2839e-03, PNorm = 169.7997, GNorm = 0.2667, lr_0 = 2.7779e-04
Loss = 3.2373e-03, PNorm = 169.8068, GNorm = 0.2900, lr_0 = 2.7760e-04
Loss = 3.8900e-03, PNorm = 169.8162, GNorm = 0.2915, lr_0 = 2.7741e-04
Loss = 4.5140e-03, PNorm = 169.8261, GNorm = 0.1967, lr_0 = 2.7722e-04
Loss = 5.6213e-03, PNorm = 169.8356, GNorm = 0.1089, lr_0 = 2.7703e-04
Loss = 5.8508e-03, PNorm = 169.8419, GNorm = 0.2060, lr_0 = 2.7684e-04
Loss = 4.1080e-03, PNorm = 169.8517, GNorm = 0.3175, lr_0 = 2.7665e-04
Loss = 4.8769e-03, PNorm = 169.8575, GNorm = 0.1523, lr_0 = 2.7646e-04
Loss = 5.0750e-03, PNorm = 169.8670, GNorm = 0.0953, lr_0 = 2.7627e-04
Loss = 5.5662e-03, PNorm = 169.8759, GNorm = 0.4013, lr_0 = 2.7608e-04
Loss = 3.7778e-03, PNorm = 169.8860, GNorm = 0.1815, lr_0 = 2.7590e-04
Loss = 4.8697e-03, PNorm = 169.8959, GNorm = 0.2698, lr_0 = 2.7571e-04
Loss = 3.8194e-03, PNorm = 169.9068, GNorm = 0.1521, lr_0 = 2.7552e-04
Loss = 4.8756e-03, PNorm = 169.9166, GNorm = 0.2005, lr_0 = 2.7533e-04
Loss = 4.0614e-03, PNorm = 169.9253, GNorm = 0.1472, lr_0 = 2.7514e-04
Loss = 3.7208e-03, PNorm = 169.9335, GNorm = 0.0820, lr_0 = 2.7495e-04
Loss = 6.5263e-03, PNorm = 169.9409, GNorm = 0.5145, lr_0 = 2.7476e-04
Loss = 4.2271e-03, PNorm = 169.9469, GNorm = 0.0906, lr_0 = 2.7457e-04
Loss = 4.6930e-03, PNorm = 169.9578, GNorm = 0.1957, lr_0 = 2.7439e-04
Loss = 3.4064e-03, PNorm = 169.9692, GNorm = 0.2744, lr_0 = 2.7420e-04
Loss = 5.6235e-03, PNorm = 169.9770, GNorm = 0.2065, lr_0 = 2.7401e-04
Loss = 3.5692e-03, PNorm = 169.9853, GNorm = 0.1293, lr_0 = 2.7382e-04
Loss = 4.9076e-03, PNorm = 169.9946, GNorm = 0.3514, lr_0 = 2.7364e-04
Loss = 3.8848e-03, PNorm = 170.0058, GNorm = 0.3061, lr_0 = 2.7345e-04
Loss = 5.6214e-03, PNorm = 170.0146, GNorm = 0.1050, lr_0 = 2.7326e-04
Loss = 4.4672e-03, PNorm = 170.0247, GNorm = 0.0842, lr_0 = 2.7307e-04
Loss = 4.6275e-03, PNorm = 170.0328, GNorm = 0.1604, lr_0 = 2.7289e-04
Loss = 3.7351e-03, PNorm = 170.0400, GNorm = 0.1848, lr_0 = 2.7270e-04
Loss = 4.4677e-03, PNorm = 170.0499, GNorm = 0.1357, lr_0 = 2.7251e-04
Loss = 5.3856e-03, PNorm = 170.0610, GNorm = 0.2553, lr_0 = 2.7233e-04
Loss = 4.4773e-03, PNorm = 170.0706, GNorm = 0.0691, lr_0 = 2.7214e-04
Loss = 4.8195e-03, PNorm = 170.0806, GNorm = 0.2662, lr_0 = 2.7195e-04
Loss = 4.6889e-03, PNorm = 170.0873, GNorm = 0.4671, lr_0 = 2.7177e-04
Loss = 4.3820e-03, PNorm = 170.0950, GNorm = 0.3448, lr_0 = 2.7158e-04
Loss = 8.6710e-03, PNorm = 170.1053, GNorm = 0.1127, lr_0 = 2.7139e-04
Loss = 3.3812e-03, PNorm = 170.1139, GNorm = 0.1969, lr_0 = 2.7121e-04
Loss = 3.9631e-03, PNorm = 170.1208, GNorm = 0.2168, lr_0 = 2.7102e-04
Loss = 3.7213e-03, PNorm = 170.1285, GNorm = 0.2440, lr_0 = 2.7084e-04
Loss = 5.1652e-03, PNorm = 170.1366, GNorm = 0.2674, lr_0 = 2.7065e-04
Loss = 3.7385e-03, PNorm = 170.1493, GNorm = 0.0906, lr_0 = 2.7047e-04
Loss = 3.6186e-03, PNorm = 170.1597, GNorm = 0.1559, lr_0 = 2.7028e-04
Loss = 2.9608e-03, PNorm = 170.1680, GNorm = 0.0831, lr_0 = 2.7010e-04
Loss = 3.4821e-03, PNorm = 170.1758, GNorm = 0.1724, lr_0 = 2.6991e-04
Loss = 4.7968e-03, PNorm = 170.1857, GNorm = 0.1989, lr_0 = 2.6973e-04
Loss = 3.5772e-03, PNorm = 170.1955, GNorm = 0.0774, lr_0 = 2.6954e-04
Loss = 3.5191e-03, PNorm = 170.2043, GNorm = 0.1746, lr_0 = 2.6936e-04
Loss = 2.7974e-03, PNorm = 170.2137, GNorm = 0.0934, lr_0 = 2.6917e-04
Loss = 3.7377e-03, PNorm = 170.2208, GNorm = 0.2234, lr_0 = 2.6899e-04
Loss = 5.0450e-03, PNorm = 170.2286, GNorm = 0.1694, lr_0 = 2.6880e-04
Loss = 3.4935e-03, PNorm = 170.2366, GNorm = 0.0849, lr_0 = 2.6862e-04
Loss = 4.7390e-03, PNorm = 170.2457, GNorm = 0.3848, lr_0 = 2.6844e-04
Loss = 3.4813e-03, PNorm = 170.2555, GNorm = 0.1145, lr_0 = 2.6825e-04
Validation mae = 0.278937
Epoch 18
Loss = 3.3175e-03, PNorm = 170.2624, GNorm = 0.3359, lr_0 = 2.6807e-04
Loss = 3.6140e-03, PNorm = 170.2699, GNorm = 0.1546, lr_0 = 2.6788e-04
Loss = 3.4020e-03, PNorm = 170.2781, GNorm = 0.2529, lr_0 = 2.6770e-04
Loss = 4.3644e-03, PNorm = 170.2844, GNorm = 0.1455, lr_0 = 2.6752e-04
Loss = 5.7272e-03, PNorm = 170.2878, GNorm = 0.1551, lr_0 = 2.6733e-04
Loss = 3.2671e-03, PNorm = 170.2917, GNorm = 0.2942, lr_0 = 2.6715e-04
Loss = 3.6796e-03, PNorm = 170.2955, GNorm = 0.2145, lr_0 = 2.6697e-04
Loss = 3.3026e-03, PNorm = 170.3040, GNorm = 0.1665, lr_0 = 2.6678e-04
Loss = 3.0155e-03, PNorm = 170.3125, GNorm = 0.0902, lr_0 = 2.6660e-04
Loss = 3.3814e-03, PNorm = 170.3210, GNorm = 0.2831, lr_0 = 2.6642e-04
Loss = 2.9626e-03, PNorm = 170.3293, GNorm = 0.2513, lr_0 = 2.6624e-04
Loss = 6.2442e-03, PNorm = 170.3382, GNorm = 0.2640, lr_0 = 2.6605e-04
Loss = 3.6814e-03, PNorm = 170.3496, GNorm = 0.3528, lr_0 = 2.6587e-04
Loss = 4.2542e-03, PNorm = 170.3560, GNorm = 0.2817, lr_0 = 2.6569e-04
Loss = 3.2283e-03, PNorm = 170.3612, GNorm = 0.2616, lr_0 = 2.6551e-04
Loss = 3.8327e-03, PNorm = 170.3683, GNorm = 0.2332, lr_0 = 2.6533e-04
Loss = 4.0248e-03, PNorm = 170.3751, GNorm = 0.1808, lr_0 = 2.6514e-04
Loss = 3.1198e-03, PNorm = 170.3850, GNorm = 0.3183, lr_0 = 2.6496e-04
Loss = 4.0679e-03, PNorm = 170.3917, GNorm = 0.1053, lr_0 = 2.6478e-04
Loss = 3.2030e-03, PNorm = 170.3987, GNorm = 0.1493, lr_0 = 2.6460e-04
Loss = 2.9167e-03, PNorm = 170.4041, GNorm = 0.2446, lr_0 = 2.6442e-04
Loss = 3.8577e-03, PNorm = 170.4133, GNorm = 0.2922, lr_0 = 2.6424e-04
Loss = 3.5223e-03, PNorm = 170.4205, GNorm = 0.1726, lr_0 = 2.6406e-04
Loss = 3.3052e-03, PNorm = 170.4259, GNorm = 0.3821, lr_0 = 2.6388e-04
Loss = 3.1699e-03, PNorm = 170.4333, GNorm = 0.1598, lr_0 = 2.6369e-04
Loss = 3.3127e-03, PNorm = 170.4417, GNorm = 0.2835, lr_0 = 2.6351e-04
Loss = 4.3622e-03, PNorm = 170.4509, GNorm = 0.4699, lr_0 = 2.6333e-04
Loss = 4.1941e-03, PNorm = 170.4598, GNorm = 0.2033, lr_0 = 2.6315e-04
Loss = 3.4018e-03, PNorm = 170.4677, GNorm = 0.0678, lr_0 = 2.6297e-04
Loss = 3.4946e-03, PNorm = 170.4749, GNorm = 0.1473, lr_0 = 2.6279e-04
Loss = 3.3579e-03, PNorm = 170.4805, GNorm = 0.3195, lr_0 = 2.6261e-04
Loss = 5.5717e-03, PNorm = 170.4859, GNorm = 0.3100, lr_0 = 2.6243e-04
Loss = 3.4337e-03, PNorm = 170.4929, GNorm = 0.2640, lr_0 = 2.6225e-04
Loss = 4.8215e-03, PNorm = 170.5019, GNorm = 0.5080, lr_0 = 2.6207e-04
Loss = 5.4681e-03, PNorm = 170.5096, GNorm = 0.2089, lr_0 = 2.6189e-04
Loss = 3.9111e-03, PNorm = 170.5171, GNorm = 0.1304, lr_0 = 2.6171e-04
Loss = 7.6140e-03, PNorm = 170.5230, GNorm = 0.1906, lr_0 = 2.6153e-04
Loss = 4.3422e-03, PNorm = 170.5329, GNorm = 0.2382, lr_0 = 2.6136e-04
Loss = 4.8138e-03, PNorm = 170.5417, GNorm = 0.2607, lr_0 = 2.6118e-04
Loss = 3.6662e-03, PNorm = 170.5503, GNorm = 0.0580, lr_0 = 2.6100e-04
Loss = 3.9633e-03, PNorm = 170.5605, GNorm = 0.2651, lr_0 = 2.6082e-04
Loss = 2.8983e-03, PNorm = 170.5692, GNorm = 0.0856, lr_0 = 2.6064e-04
Loss = 3.4900e-03, PNorm = 170.5765, GNorm = 0.0939, lr_0 = 2.6046e-04
Loss = 4.1269e-03, PNorm = 170.5853, GNorm = 0.1130, lr_0 = 2.6028e-04
Loss = 2.9707e-03, PNorm = 170.5927, GNorm = 0.2318, lr_0 = 2.6011e-04
Loss = 2.6807e-03, PNorm = 170.5973, GNorm = 0.1495, lr_0 = 2.5993e-04
Loss = 3.1330e-03, PNorm = 170.6036, GNorm = 0.2062, lr_0 = 2.5975e-04
Loss = 3.6792e-03, PNorm = 170.6111, GNorm = 0.2833, lr_0 = 2.5957e-04
Loss = 3.9799e-03, PNorm = 170.6144, GNorm = 0.1034, lr_0 = 2.5939e-04
Loss = 4.1866e-03, PNorm = 170.6193, GNorm = 0.0971, lr_0 = 2.5922e-04
Loss = 3.7660e-03, PNorm = 170.6244, GNorm = 0.1615, lr_0 = 2.5904e-04
Loss = 4.6143e-03, PNorm = 170.6318, GNorm = 0.2920, lr_0 = 2.5886e-04
Loss = 2.9048e-03, PNorm = 170.6394, GNorm = 0.1818, lr_0 = 2.5868e-04
Loss = 3.7509e-03, PNorm = 170.6467, GNorm = 0.3459, lr_0 = 2.5851e-04
Loss = 3.4296e-03, PNorm = 170.6534, GNorm = 0.2413, lr_0 = 2.5833e-04
Loss = 5.0735e-03, PNorm = 170.6609, GNorm = 0.0915, lr_0 = 2.5815e-04
Loss = 3.7146e-03, PNorm = 170.6678, GNorm = 0.1860, lr_0 = 2.5797e-04
Loss = 3.3023e-03, PNorm = 170.6762, GNorm = 0.0986, lr_0 = 2.5780e-04
Loss = 4.0291e-03, PNorm = 170.6835, GNorm = 0.4185, lr_0 = 2.5762e-04
Loss = 4.1852e-03, PNorm = 170.6906, GNorm = 0.2185, lr_0 = 2.5745e-04
Loss = 3.7636e-03, PNorm = 170.6982, GNorm = 0.1144, lr_0 = 2.5727e-04
Loss = 3.9989e-03, PNorm = 170.7059, GNorm = 0.4046, lr_0 = 2.5709e-04
Loss = 4.1732e-03, PNorm = 170.7140, GNorm = 0.1763, lr_0 = 2.5692e-04
Loss = 3.6671e-03, PNorm = 170.7206, GNorm = 0.0627, lr_0 = 2.5674e-04
Loss = 3.0615e-03, PNorm = 170.7272, GNorm = 0.1801, lr_0 = 2.5656e-04
Loss = 5.8473e-03, PNorm = 170.7353, GNorm = 0.3287, lr_0 = 2.5639e-04
Loss = 4.2552e-03, PNorm = 170.7483, GNorm = 0.2057, lr_0 = 2.5621e-04
Loss = 3.3885e-03, PNorm = 170.7548, GNorm = 0.2507, lr_0 = 2.5604e-04
Loss = 3.7841e-03, PNorm = 170.7601, GNorm = 0.0808, lr_0 = 2.5586e-04
Loss = 5.5257e-03, PNorm = 170.7678, GNorm = 0.1627, lr_0 = 2.5569e-04
Loss = 3.7614e-03, PNorm = 170.7779, GNorm = 0.1803, lr_0 = 2.5551e-04
Loss = 6.3044e-03, PNorm = 170.7890, GNorm = 0.0836, lr_0 = 2.5534e-04
Loss = 3.3824e-03, PNorm = 170.7996, GNorm = 0.0709, lr_0 = 2.5516e-04
Loss = 3.7892e-03, PNorm = 170.8101, GNorm = 0.0833, lr_0 = 2.5499e-04
Loss = 4.0845e-03, PNorm = 170.8183, GNorm = 0.2635, lr_0 = 2.5481e-04
Loss = 4.3041e-03, PNorm = 170.8250, GNorm = 0.2668, lr_0 = 2.5464e-04
Loss = 5.8458e-03, PNorm = 170.8309, GNorm = 0.1504, lr_0 = 2.5446e-04
Loss = 5.3575e-03, PNorm = 170.8379, GNorm = 0.2376, lr_0 = 2.5429e-04
Loss = 5.2849e-03, PNorm = 170.8454, GNorm = 0.4013, lr_0 = 2.5411e-04
Loss = 3.8678e-03, PNorm = 170.8533, GNorm = 0.3524, lr_0 = 2.5394e-04
Loss = 3.4251e-03, PNorm = 170.8649, GNorm = 0.3440, lr_0 = 2.5377e-04
Loss = 3.8485e-03, PNorm = 170.8739, GNorm = 0.0573, lr_0 = 2.5359e-04
Loss = 3.4774e-03, PNorm = 170.8800, GNorm = 0.1935, lr_0 = 2.5342e-04
Loss = 3.5609e-03, PNorm = 170.8869, GNorm = 0.1589, lr_0 = 2.5325e-04
Loss = 3.7349e-03, PNorm = 170.8953, GNorm = 0.1875, lr_0 = 2.5307e-04
Loss = 3.0915e-03, PNorm = 170.9049, GNorm = 0.1938, lr_0 = 2.5290e-04
Loss = 3.2418e-03, PNorm = 170.9141, GNorm = 0.1381, lr_0 = 2.5273e-04
Loss = 3.0316e-03, PNorm = 170.9221, GNorm = 0.1915, lr_0 = 2.5255e-04
Loss = 4.4583e-03, PNorm = 170.9318, GNorm = 0.3715, lr_0 = 2.5238e-04
Loss = 3.2873e-03, PNorm = 170.9391, GNorm = 0.0618, lr_0 = 2.5221e-04
Loss = 2.9469e-03, PNorm = 170.9463, GNorm = 0.0733, lr_0 = 2.5203e-04
Loss = 3.9730e-03, PNorm = 170.9532, GNorm = 0.0715, lr_0 = 2.5186e-04
Loss = 3.6663e-03, PNorm = 170.9599, GNorm = 0.2963, lr_0 = 2.5169e-04
Loss = 3.3965e-03, PNorm = 170.9681, GNorm = 0.0845, lr_0 = 2.5152e-04
Loss = 3.5421e-03, PNorm = 170.9784, GNorm = 0.2225, lr_0 = 2.5134e-04
Loss = 4.8766e-03, PNorm = 170.9831, GNorm = 0.1521, lr_0 = 2.5117e-04
Loss = 4.9228e-03, PNorm = 170.9898, GNorm = 0.1839, lr_0 = 2.5100e-04
Loss = 3.2647e-03, PNorm = 170.9952, GNorm = 0.0969, lr_0 = 2.5083e-04
Loss = 4.1453e-03, PNorm = 171.0030, GNorm = 0.0853, lr_0 = 2.5066e-04
Loss = 3.7960e-03, PNorm = 171.0105, GNorm = 0.1756, lr_0 = 2.5048e-04
Loss = 4.2539e-03, PNorm = 171.0177, GNorm = 0.1755, lr_0 = 2.5031e-04
Loss = 3.3067e-03, PNorm = 171.0258, GNorm = 0.2083, lr_0 = 2.5014e-04
Loss = 5.5990e-03, PNorm = 171.0341, GNorm = 0.1036, lr_0 = 2.4997e-04
Loss = 3.9115e-03, PNorm = 171.0417, GNorm = 0.1870, lr_0 = 2.4980e-04
Loss = 3.4887e-03, PNorm = 171.0497, GNorm = 0.4670, lr_0 = 2.4963e-04
Loss = 1.0264e-02, PNorm = 171.0580, GNorm = 0.5244, lr_0 = 2.4946e-04
Loss = 4.6401e-03, PNorm = 171.0661, GNorm = 0.4839, lr_0 = 2.4929e-04
Loss = 4.1629e-03, PNorm = 171.0750, GNorm = 0.2611, lr_0 = 2.4911e-04
Loss = 4.0080e-03, PNorm = 171.0812, GNorm = 0.1955, lr_0 = 2.4894e-04
Loss = 3.6715e-03, PNorm = 171.0908, GNorm = 0.3755, lr_0 = 2.4877e-04
Loss = 6.4238e-03, PNorm = 171.1007, GNorm = 0.4552, lr_0 = 2.4860e-04
Loss = 6.0555e-03, PNorm = 171.1132, GNorm = 0.3548, lr_0 = 2.4843e-04
Loss = 5.2904e-03, PNorm = 171.1237, GNorm = 0.2035, lr_0 = 2.4826e-04
Loss = 5.5582e-03, PNorm = 171.1331, GNorm = 0.1523, lr_0 = 2.4809e-04
Loss = 3.6600e-03, PNorm = 171.1412, GNorm = 0.3724, lr_0 = 2.4792e-04
Loss = 4.0292e-03, PNorm = 171.1471, GNorm = 0.2632, lr_0 = 2.4775e-04
Loss = 4.2706e-03, PNorm = 171.1547, GNorm = 0.1631, lr_0 = 2.4758e-04
Loss = 3.8661e-03, PNorm = 171.1640, GNorm = 0.3458, lr_0 = 2.4741e-04
Loss = 4.4010e-03, PNorm = 171.1722, GNorm = 0.1628, lr_0 = 2.4724e-04
Loss = 2.9368e-03, PNorm = 171.1794, GNorm = 0.2367, lr_0 = 2.4707e-04
Validation mae = 0.279947
Epoch 19
Loss = 3.3600e-03, PNorm = 171.1873, GNorm = 0.1632, lr_0 = 2.4690e-04
Loss = 3.4626e-03, PNorm = 171.1914, GNorm = 0.3531, lr_0 = 2.4674e-04
Loss = 3.6441e-03, PNorm = 171.1968, GNorm = 0.2044, lr_0 = 2.4657e-04
Loss = 4.6461e-03, PNorm = 171.1995, GNorm = 0.1550, lr_0 = 2.4640e-04
Loss = 3.9752e-03, PNorm = 171.2077, GNorm = 0.1964, lr_0 = 2.4623e-04
Loss = 3.6428e-03, PNorm = 171.2128, GNorm = 0.4080, lr_0 = 2.4606e-04
Loss = 3.2750e-03, PNorm = 171.2206, GNorm = 0.2651, lr_0 = 2.4589e-04
Loss = 3.3190e-03, PNorm = 171.2247, GNorm = 0.0875, lr_0 = 2.4572e-04
Loss = 3.8414e-03, PNorm = 171.2301, GNorm = 0.0804, lr_0 = 2.4556e-04
Loss = 4.5661e-03, PNorm = 171.2388, GNorm = 0.0999, lr_0 = 2.4539e-04
Loss = 3.1216e-03, PNorm = 171.2463, GNorm = 0.0771, lr_0 = 2.4522e-04
Loss = 2.5520e-03, PNorm = 171.2541, GNorm = 0.1754, lr_0 = 2.4505e-04
Loss = 3.0184e-03, PNorm = 171.2590, GNorm = 0.2302, lr_0 = 2.4488e-04
Loss = 3.1371e-03, PNorm = 171.2648, GNorm = 0.1897, lr_0 = 2.4472e-04
Loss = 2.8062e-03, PNorm = 171.2679, GNorm = 0.1580, lr_0 = 2.4455e-04
Loss = 2.6925e-03, PNorm = 171.2727, GNorm = 0.1749, lr_0 = 2.4438e-04
Loss = 3.2167e-03, PNorm = 171.2787, GNorm = 0.1563, lr_0 = 2.4421e-04
Loss = 3.1399e-03, PNorm = 171.2856, GNorm = 0.0814, lr_0 = 2.4405e-04
Loss = 3.2074e-03, PNorm = 171.2917, GNorm = 0.1973, lr_0 = 2.4388e-04
Loss = 2.6668e-03, PNorm = 171.2973, GNorm = 0.1750, lr_0 = 2.4371e-04
Loss = 2.4469e-03, PNorm = 171.3029, GNorm = 0.1067, lr_0 = 2.4354e-04
Loss = 5.2827e-03, PNorm = 171.3073, GNorm = 0.2323, lr_0 = 2.4338e-04
Loss = 3.0828e-03, PNorm = 171.3122, GNorm = 0.4564, lr_0 = 2.4321e-04
Loss = 2.9278e-03, PNorm = 171.3184, GNorm = 0.2718, lr_0 = 2.4304e-04
Loss = 4.0890e-03, PNorm = 171.3252, GNorm = 0.2011, lr_0 = 2.4288e-04
Loss = 2.7687e-03, PNorm = 171.3329, GNorm = 0.3096, lr_0 = 2.4271e-04
Loss = 2.9916e-03, PNorm = 171.3370, GNorm = 0.1608, lr_0 = 2.4254e-04
Loss = 3.6626e-03, PNorm = 171.3400, GNorm = 0.1790, lr_0 = 2.4238e-04
Loss = 2.6516e-03, PNorm = 171.3441, GNorm = 0.3214, lr_0 = 2.4221e-04
Loss = 4.6099e-03, PNorm = 171.3499, GNorm = 0.2738, lr_0 = 2.4205e-04
Loss = 2.7085e-03, PNorm = 171.3534, GNorm = 0.1388, lr_0 = 2.4188e-04
Loss = 3.4440e-03, PNorm = 171.3613, GNorm = 0.2837, lr_0 = 2.4171e-04
Loss = 3.0742e-03, PNorm = 171.3705, GNorm = 0.0720, lr_0 = 2.4155e-04
Loss = 3.2666e-03, PNorm = 171.3778, GNorm = 0.0878, lr_0 = 2.4138e-04
Loss = 4.8796e-03, PNorm = 171.3844, GNorm = 0.2143, lr_0 = 2.4122e-04
Loss = 2.9408e-03, PNorm = 171.3920, GNorm = 0.2709, lr_0 = 2.4105e-04
Loss = 2.9846e-03, PNorm = 171.3992, GNorm = 0.1652, lr_0 = 2.4089e-04
Loss = 3.0560e-03, PNorm = 171.4062, GNorm = 0.1671, lr_0 = 2.4072e-04
Loss = 3.2574e-03, PNorm = 171.4109, GNorm = 0.0593, lr_0 = 2.4056e-04
Loss = 3.1162e-03, PNorm = 171.4167, GNorm = 0.3529, lr_0 = 2.4039e-04
Loss = 2.5601e-03, PNorm = 171.4231, GNorm = 0.2163, lr_0 = 2.4023e-04
Loss = 4.1768e-03, PNorm = 171.4276, GNorm = 0.2551, lr_0 = 2.4006e-04
Loss = 3.9860e-03, PNorm = 171.4329, GNorm = 0.2128, lr_0 = 2.3990e-04
Loss = 3.3136e-03, PNorm = 171.4409, GNorm = 0.1770, lr_0 = 2.3974e-04
Loss = 3.1420e-03, PNorm = 171.4473, GNorm = 0.3143, lr_0 = 2.3957e-04
Loss = 3.8408e-03, PNorm = 171.4539, GNorm = 0.1844, lr_0 = 2.3941e-04
Loss = 4.0001e-03, PNorm = 171.4576, GNorm = 0.2316, lr_0 = 2.3924e-04
Loss = 2.9759e-03, PNorm = 171.4614, GNorm = 0.2508, lr_0 = 2.3908e-04
Loss = 3.3221e-03, PNorm = 171.4690, GNorm = 0.1314, lr_0 = 2.3892e-04
Loss = 3.6302e-03, PNorm = 171.4742, GNorm = 0.2720, lr_0 = 2.3875e-04
Loss = 3.7290e-03, PNorm = 171.4819, GNorm = 0.0506, lr_0 = 2.3859e-04
Loss = 3.7266e-03, PNorm = 171.4888, GNorm = 0.0955, lr_0 = 2.3842e-04
Loss = 4.8301e-03, PNorm = 171.4954, GNorm = 0.1896, lr_0 = 2.3826e-04
Loss = 2.4727e-03, PNorm = 171.5026, GNorm = 0.0982, lr_0 = 2.3810e-04
Loss = 2.8732e-03, PNorm = 171.5080, GNorm = 0.1022, lr_0 = 2.3794e-04
Loss = 3.8950e-03, PNorm = 171.5128, GNorm = 0.0955, lr_0 = 2.3777e-04
Loss = 3.2966e-03, PNorm = 171.5177, GNorm = 0.1969, lr_0 = 2.3761e-04
Loss = 2.8467e-03, PNorm = 171.5219, GNorm = 0.2372, lr_0 = 2.3745e-04
Loss = 8.1933e-03, PNorm = 171.5263, GNorm = 0.0620, lr_0 = 2.3728e-04
Loss = 4.2668e-03, PNorm = 171.5356, GNorm = 0.6008, lr_0 = 2.3712e-04
Loss = 4.9033e-03, PNorm = 171.5454, GNorm = 0.1505, lr_0 = 2.3696e-04
Loss = 2.6675e-03, PNorm = 171.5520, GNorm = 0.2376, lr_0 = 2.3680e-04
Loss = 3.0790e-03, PNorm = 171.5580, GNorm = 0.0630, lr_0 = 2.3663e-04
Loss = 3.2763e-03, PNorm = 171.5633, GNorm = 0.3831, lr_0 = 2.3647e-04
Loss = 3.1922e-03, PNorm = 171.5710, GNorm = 0.2129, lr_0 = 2.3631e-04
Loss = 4.3527e-03, PNorm = 171.5756, GNorm = 0.1180, lr_0 = 2.3615e-04
Loss = 2.9478e-03, PNorm = 171.5821, GNorm = 0.1371, lr_0 = 2.3599e-04
Loss = 2.6080e-03, PNorm = 171.5899, GNorm = 0.1371, lr_0 = 2.3582e-04
Loss = 3.8142e-03, PNorm = 171.5967, GNorm = 0.2312, lr_0 = 2.3566e-04
Loss = 2.9584e-03, PNorm = 171.6019, GNorm = 0.1491, lr_0 = 2.3550e-04
Loss = 4.3692e-03, PNorm = 171.6072, GNorm = 0.1644, lr_0 = 2.3534e-04
Loss = 2.6622e-03, PNorm = 171.6131, GNorm = 0.1264, lr_0 = 2.3518e-04
Loss = 3.6728e-03, PNorm = 171.6184, GNorm = 0.0383, lr_0 = 2.3502e-04
Loss = 2.8001e-03, PNorm = 171.6255, GNorm = 0.1264, lr_0 = 2.3486e-04
Loss = 3.2188e-03, PNorm = 171.6319, GNorm = 0.1027, lr_0 = 2.3470e-04
Loss = 2.8887e-03, PNorm = 171.6401, GNorm = 0.1243, lr_0 = 2.3454e-04
Loss = 3.7428e-03, PNorm = 171.6476, GNorm = 0.2628, lr_0 = 2.3437e-04
Loss = 3.0175e-03, PNorm = 171.6554, GNorm = 0.3384, lr_0 = 2.3421e-04
Loss = 2.9305e-03, PNorm = 171.6610, GNorm = 0.0923, lr_0 = 2.3405e-04
Loss = 3.1928e-03, PNorm = 171.6662, GNorm = 0.2570, lr_0 = 2.3389e-04
Loss = 3.5944e-03, PNorm = 171.6723, GNorm = 0.2185, lr_0 = 2.3373e-04
Loss = 2.8530e-03, PNorm = 171.6787, GNorm = 0.1856, lr_0 = 2.3357e-04
Loss = 3.0521e-03, PNorm = 171.6868, GNorm = 0.0750, lr_0 = 2.3341e-04
Loss = 3.9442e-03, PNorm = 171.6934, GNorm = 0.1531, lr_0 = 2.3325e-04
Loss = 2.6485e-03, PNorm = 171.7010, GNorm = 0.4864, lr_0 = 2.3309e-04
Loss = 2.6708e-03, PNorm = 171.7094, GNorm = 0.1545, lr_0 = 2.3293e-04
Loss = 3.5032e-03, PNorm = 171.7159, GNorm = 0.0874, lr_0 = 2.3277e-04
Loss = 5.6626e-03, PNorm = 171.7232, GNorm = 0.0986, lr_0 = 2.3261e-04
Loss = 5.7137e-03, PNorm = 171.7275, GNorm = 0.1998, lr_0 = 2.3246e-04
Loss = 3.1237e-03, PNorm = 171.7334, GNorm = 0.1088, lr_0 = 2.3230e-04
Loss = 3.0800e-03, PNorm = 171.7413, GNorm = 0.2483, lr_0 = 2.3214e-04
Loss = 2.9836e-03, PNorm = 171.7502, GNorm = 0.4815, lr_0 = 2.3198e-04
Loss = 3.8687e-03, PNorm = 171.7580, GNorm = 0.0768, lr_0 = 2.3182e-04
Loss = 2.8274e-03, PNorm = 171.7658, GNorm = 0.1057, lr_0 = 2.3166e-04
Loss = 3.4355e-03, PNorm = 171.7735, GNorm = 0.1217, lr_0 = 2.3150e-04
Loss = 3.9267e-03, PNorm = 171.7787, GNorm = 0.0875, lr_0 = 2.3134e-04
Loss = 3.8791e-03, PNorm = 171.7823, GNorm = 0.1823, lr_0 = 2.3118e-04
Loss = 2.7767e-03, PNorm = 171.7892, GNorm = 0.1048, lr_0 = 2.3103e-04
Loss = 2.5446e-03, PNorm = 171.7977, GNorm = 0.0976, lr_0 = 2.3087e-04
Loss = 6.0776e-03, PNorm = 171.8039, GNorm = 0.1697, lr_0 = 2.3071e-04
Loss = 2.8208e-03, PNorm = 171.8107, GNorm = 0.1795, lr_0 = 2.3055e-04
Loss = 4.7203e-03, PNorm = 171.8175, GNorm = 0.1529, lr_0 = 2.3039e-04
Loss = 5.9041e-03, PNorm = 171.8271, GNorm = 0.3076, lr_0 = 2.3024e-04
Loss = 4.0120e-03, PNorm = 171.8333, GNorm = 0.2303, lr_0 = 2.3008e-04
Loss = 3.2695e-03, PNorm = 171.8421, GNorm = 0.0573, lr_0 = 2.2992e-04
Loss = 3.7617e-03, PNorm = 171.8477, GNorm = 0.2062, lr_0 = 2.2976e-04
Loss = 2.9319e-03, PNorm = 171.8526, GNorm = 0.0982, lr_0 = 2.2961e-04
Loss = 5.3843e-03, PNorm = 171.8618, GNorm = 0.1039, lr_0 = 2.2945e-04
Loss = 3.7332e-03, PNorm = 171.8700, GNorm = 0.1068, lr_0 = 2.2929e-04
Loss = 3.9558e-03, PNorm = 171.8773, GNorm = 0.1496, lr_0 = 2.2913e-04
Loss = 3.0554e-03, PNorm = 171.8804, GNorm = 0.2440, lr_0 = 2.2898e-04
Loss = 3.1276e-03, PNorm = 171.8866, GNorm = 0.4094, lr_0 = 2.2882e-04
Loss = 4.4937e-03, PNorm = 171.8957, GNorm = 0.0887, lr_0 = 2.2866e-04
Loss = 3.2130e-03, PNorm = 171.9042, GNorm = 0.1897, lr_0 = 2.2851e-04
Loss = 4.2501e-03, PNorm = 171.9141, GNorm = 0.0512, lr_0 = 2.2835e-04
Loss = 5.2148e-03, PNorm = 171.9213, GNorm = 0.3332, lr_0 = 2.2819e-04
Loss = 2.6162e-03, PNorm = 171.9265, GNorm = 0.2187, lr_0 = 2.2804e-04
Loss = 2.6230e-03, PNorm = 171.9322, GNorm = 0.0701, lr_0 = 2.2788e-04
Loss = 3.5956e-03, PNorm = 171.9372, GNorm = 0.2530, lr_0 = 2.2773e-04
Loss = 3.5019e-03, PNorm = 171.9419, GNorm = 0.1501, lr_0 = 2.2757e-04
Validation mae = 0.278690
Epoch 20
Loss = 3.9376e-03, PNorm = 171.9448, GNorm = 0.4444, lr_0 = 2.2741e-04
Loss = 3.0759e-03, PNorm = 171.9498, GNorm = 0.1479, lr_0 = 2.2726e-04
Loss = 2.4098e-03, PNorm = 171.9535, GNorm = 0.1191, lr_0 = 2.2710e-04
Loss = 2.2248e-03, PNorm = 171.9588, GNorm = 0.0553, lr_0 = 2.2695e-04
Loss = 2.9031e-03, PNorm = 171.9644, GNorm = 0.0750, lr_0 = 2.2679e-04
Loss = 2.5801e-03, PNorm = 171.9733, GNorm = 0.2111, lr_0 = 2.2664e-04
Loss = 2.4798e-03, PNorm = 171.9799, GNorm = 0.1120, lr_0 = 2.2648e-04
Loss = 3.4996e-03, PNorm = 171.9831, GNorm = 0.0993, lr_0 = 2.2632e-04
Loss = 2.1762e-03, PNorm = 171.9852, GNorm = 0.1642, lr_0 = 2.2617e-04
Loss = 4.4632e-03, PNorm = 171.9889, GNorm = 0.2274, lr_0 = 2.2601e-04
Loss = 3.7750e-03, PNorm = 171.9938, GNorm = 0.3948, lr_0 = 2.2586e-04
Loss = 3.4884e-03, PNorm = 171.9998, GNorm = 0.0580, lr_0 = 2.2571e-04
Loss = 3.6687e-03, PNorm = 172.0039, GNorm = 0.2436, lr_0 = 2.2555e-04
Loss = 4.1807e-03, PNorm = 172.0098, GNorm = 0.0770, lr_0 = 2.2540e-04
Loss = 2.9510e-03, PNorm = 172.0167, GNorm = 0.0730, lr_0 = 2.2524e-04
Loss = 3.3155e-03, PNorm = 172.0211, GNorm = 0.1548, lr_0 = 2.2509e-04
Loss = 2.7398e-03, PNorm = 172.0267, GNorm = 0.1302, lr_0 = 2.2493e-04
Loss = 3.0352e-03, PNorm = 172.0326, GNorm = 0.1051, lr_0 = 2.2478e-04
Loss = 2.7032e-03, PNorm = 172.0395, GNorm = 0.1288, lr_0 = 2.2463e-04
Loss = 5.1720e-03, PNorm = 172.0448, GNorm = 0.0688, lr_0 = 2.2447e-04
Loss = 3.0257e-03, PNorm = 172.0520, GNorm = 0.1208, lr_0 = 2.2432e-04
Loss = 4.5046e-03, PNorm = 172.0573, GNorm = 0.1777, lr_0 = 2.2416e-04
Loss = 5.1327e-03, PNorm = 172.0668, GNorm = 0.1583, lr_0 = 2.2401e-04
Loss = 2.5225e-03, PNorm = 172.0738, GNorm = 0.1263, lr_0 = 2.2386e-04
Loss = 4.6920e-03, PNorm = 172.0773, GNorm = 0.0846, lr_0 = 2.2370e-04
Loss = 2.2402e-03, PNorm = 172.0820, GNorm = 0.1018, lr_0 = 2.2355e-04
Loss = 2.6540e-03, PNorm = 172.0858, GNorm = 0.3890, lr_0 = 2.2340e-04
Loss = 3.6811e-03, PNorm = 172.0900, GNorm = 0.1296, lr_0 = 2.2324e-04
Loss = 2.4364e-03, PNorm = 172.0953, GNorm = 0.1480, lr_0 = 2.2309e-04
Loss = 3.3834e-03, PNorm = 172.1013, GNorm = 0.0543, lr_0 = 2.2294e-04
Loss = 3.4523e-03, PNorm = 172.1058, GNorm = 0.2387, lr_0 = 2.2279e-04
Loss = 3.0831e-03, PNorm = 172.1111, GNorm = 0.1500, lr_0 = 2.2263e-04
Loss = 3.4326e-03, PNorm = 172.1139, GNorm = 0.1728, lr_0 = 2.2248e-04
Loss = 2.7885e-03, PNorm = 172.1180, GNorm = 0.3631, lr_0 = 2.2233e-04
Loss = 3.8475e-03, PNorm = 172.1210, GNorm = 0.3844, lr_0 = 2.2218e-04
Loss = 3.5681e-03, PNorm = 172.1263, GNorm = 0.0630, lr_0 = 2.2202e-04
Loss = 2.9554e-03, PNorm = 172.1318, GNorm = 0.1371, lr_0 = 2.2187e-04
Loss = 2.1152e-03, PNorm = 172.1376, GNorm = 0.1829, lr_0 = 2.2172e-04
Loss = 3.0477e-03, PNorm = 172.1430, GNorm = 0.0450, lr_0 = 2.2157e-04
Loss = 2.3252e-03, PNorm = 172.1490, GNorm = 0.2341, lr_0 = 2.2142e-04
Loss = 2.4132e-03, PNorm = 172.1537, GNorm = 0.0783, lr_0 = 2.2126e-04
Loss = 2.7465e-03, PNorm = 172.1579, GNorm = 0.2220, lr_0 = 2.2111e-04
Loss = 2.5982e-03, PNorm = 172.1635, GNorm = 0.1300, lr_0 = 2.2096e-04
Loss = 2.4635e-03, PNorm = 172.1687, GNorm = 0.2857, lr_0 = 2.2081e-04
Loss = 2.6407e-03, PNorm = 172.1747, GNorm = 0.0770, lr_0 = 2.2066e-04
Loss = 2.3680e-03, PNorm = 172.1780, GNorm = 0.1002, lr_0 = 2.2051e-04
Loss = 3.2920e-03, PNorm = 172.1812, GNorm = 0.1864, lr_0 = 2.2036e-04
Loss = 2.2880e-03, PNorm = 172.1837, GNorm = 0.1278, lr_0 = 2.2021e-04
Loss = 2.2961e-03, PNorm = 172.1872, GNorm = 0.1733, lr_0 = 2.2005e-04
Loss = 2.7067e-03, PNorm = 172.1919, GNorm = 0.2934, lr_0 = 2.1990e-04
Loss = 2.8890e-03, PNorm = 172.1957, GNorm = 0.2045, lr_0 = 2.1975e-04
Loss = 4.7750e-03, PNorm = 172.1979, GNorm = 0.3487, lr_0 = 2.1960e-04
Loss = 2.4765e-03, PNorm = 172.2027, GNorm = 0.1075, lr_0 = 2.1945e-04
Loss = 2.4430e-03, PNorm = 172.2105, GNorm = 0.2341, lr_0 = 2.1930e-04
Loss = 2.2025e-03, PNorm = 172.2199, GNorm = 0.1201, lr_0 = 2.1915e-04
Loss = 3.0651e-03, PNorm = 172.2260, GNorm = 0.0716, lr_0 = 2.1900e-04
Loss = 2.9043e-03, PNorm = 172.2318, GNorm = 0.1278, lr_0 = 2.1885e-04
Loss = 2.1556e-03, PNorm = 172.2370, GNorm = 0.0901, lr_0 = 2.1870e-04
Loss = 2.4040e-03, PNorm = 172.2411, GNorm = 0.2279, lr_0 = 2.1855e-04
Loss = 2.9071e-03, PNorm = 172.2439, GNorm = 0.1662, lr_0 = 2.1840e-04
Loss = 3.5565e-03, PNorm = 172.2474, GNorm = 0.0748, lr_0 = 2.1825e-04
Loss = 2.2937e-03, PNorm = 172.2530, GNorm = 0.1934, lr_0 = 2.1810e-04
Loss = 3.6501e-03, PNorm = 172.2589, GNorm = 0.1046, lr_0 = 2.1795e-04
Loss = 2.3506e-03, PNorm = 172.2648, GNorm = 0.2045, lr_0 = 2.1780e-04
Loss = 4.2546e-03, PNorm = 172.2702, GNorm = 0.1172, lr_0 = 2.1765e-04
Loss = 2.0772e-03, PNorm = 172.2765, GNorm = 0.1439, lr_0 = 2.1751e-04
Loss = 4.8614e-03, PNorm = 172.2822, GNorm = 0.2962, lr_0 = 2.1736e-04
Loss = 2.9155e-03, PNorm = 172.2875, GNorm = 0.1784, lr_0 = 2.1721e-04
Loss = 2.4503e-03, PNorm = 172.2930, GNorm = 0.3766, lr_0 = 2.1706e-04
Loss = 2.8120e-03, PNorm = 172.2974, GNorm = 0.1446, lr_0 = 2.1691e-04
Loss = 2.3359e-03, PNorm = 172.3034, GNorm = 0.1271, lr_0 = 2.1676e-04
Loss = 2.3952e-03, PNorm = 172.3099, GNorm = 0.1134, lr_0 = 2.1661e-04
Loss = 3.5641e-03, PNorm = 172.3171, GNorm = 0.1352, lr_0 = 2.1646e-04
Loss = 2.7786e-03, PNorm = 172.3226, GNorm = 0.3387, lr_0 = 2.1632e-04
Loss = 4.0928e-03, PNorm = 172.3274, GNorm = 0.5017, lr_0 = 2.1617e-04
Loss = 4.2011e-03, PNorm = 172.3300, GNorm = 0.0903, lr_0 = 2.1602e-04
Loss = 5.4505e-03, PNorm = 172.3364, GNorm = 0.3827, lr_0 = 2.1587e-04
Loss = 2.1111e-03, PNorm = 172.3436, GNorm = 0.0956, lr_0 = 2.1572e-04
Loss = 2.2812e-03, PNorm = 172.3508, GNorm = 0.0945, lr_0 = 2.1558e-04
Loss = 2.3254e-03, PNorm = 172.3574, GNorm = 0.0866, lr_0 = 2.1543e-04
Loss = 3.1002e-03, PNorm = 172.3603, GNorm = 0.0927, lr_0 = 2.1528e-04
Loss = 2.5948e-03, PNorm = 172.3619, GNorm = 0.0479, lr_0 = 2.1513e-04
Loss = 2.3068e-03, PNorm = 172.3660, GNorm = 0.0788, lr_0 = 2.1499e-04
Loss = 5.0265e-03, PNorm = 172.3689, GNorm = 0.3492, lr_0 = 2.1484e-04
Loss = 2.6280e-03, PNorm = 172.3748, GNorm = 0.1724, lr_0 = 2.1469e-04
Loss = 2.8052e-03, PNorm = 172.3795, GNorm = 0.5214, lr_0 = 2.1454e-04
Loss = 4.0524e-03, PNorm = 172.3869, GNorm = 0.3042, lr_0 = 2.1440e-04
Loss = 3.0174e-03, PNorm = 172.3940, GNorm = 0.0875, lr_0 = 2.1425e-04
Loss = 3.0929e-03, PNorm = 172.4023, GNorm = 0.1292, lr_0 = 2.1410e-04
Loss = 2.9204e-03, PNorm = 172.4084, GNorm = 0.0937, lr_0 = 2.1396e-04
Loss = 4.5772e-03, PNorm = 172.4132, GNorm = 0.1664, lr_0 = 2.1381e-04
Loss = 3.0311e-03, PNorm = 172.4175, GNorm = 0.1935, lr_0 = 2.1366e-04
Loss = 2.5533e-03, PNorm = 172.4207, GNorm = 0.1116, lr_0 = 2.1352e-04
Loss = 3.5354e-03, PNorm = 172.4257, GNorm = 0.0511, lr_0 = 2.1337e-04
Loss = 3.6656e-03, PNorm = 172.4319, GNorm = 0.1697, lr_0 = 2.1323e-04
Loss = 2.2437e-03, PNorm = 172.4402, GNorm = 0.0549, lr_0 = 2.1308e-04
Loss = 3.4824e-03, PNorm = 172.4467, GNorm = 0.1973, lr_0 = 2.1293e-04
Loss = 2.1775e-03, PNorm = 172.4549, GNorm = 0.2008, lr_0 = 2.1279e-04
Loss = 3.3186e-03, PNorm = 172.4595, GNorm = 0.1954, lr_0 = 2.1264e-04
Loss = 4.2309e-03, PNorm = 172.4657, GNorm = 0.2094, lr_0 = 2.1250e-04
Loss = 3.1928e-03, PNorm = 172.4697, GNorm = 0.1590, lr_0 = 2.1235e-04
Loss = 2.4082e-03, PNorm = 172.4762, GNorm = 0.1315, lr_0 = 2.1221e-04
Loss = 3.6721e-03, PNorm = 172.4830, GNorm = 0.0637, lr_0 = 2.1206e-04
Loss = 4.7470e-03, PNorm = 172.4881, GNorm = 0.3742, lr_0 = 2.1191e-04
Loss = 2.8819e-03, PNorm = 172.4934, GNorm = 0.0938, lr_0 = 2.1177e-04
Loss = 2.5201e-03, PNorm = 172.4991, GNorm = 0.1197, lr_0 = 2.1162e-04
Loss = 2.7433e-03, PNorm = 172.5040, GNorm = 0.2361, lr_0 = 2.1148e-04
Loss = 3.2960e-03, PNorm = 172.5123, GNorm = 0.2387, lr_0 = 2.1133e-04
Loss = 3.9132e-03, PNorm = 172.5205, GNorm = 0.3149, lr_0 = 2.1119e-04
Loss = 2.8858e-03, PNorm = 172.5247, GNorm = 0.1397, lr_0 = 2.1104e-04
Loss = 2.9458e-03, PNorm = 172.5286, GNorm = 0.2602, lr_0 = 2.1090e-04
Loss = 2.7705e-03, PNorm = 172.5340, GNorm = 0.1224, lr_0 = 2.1076e-04
Loss = 2.6267e-03, PNorm = 172.5393, GNorm = 0.4871, lr_0 = 2.1061e-04
Loss = 3.5398e-03, PNorm = 172.5449, GNorm = 0.2080, lr_0 = 2.1047e-04
Loss = 3.4054e-03, PNorm = 172.5512, GNorm = 0.2785, lr_0 = 2.1032e-04
Loss = 2.7930e-03, PNorm = 172.5574, GNorm = 0.0767, lr_0 = 2.1018e-04
Loss = 3.4892e-03, PNorm = 172.5656, GNorm = 0.1455, lr_0 = 2.1003e-04
Loss = 2.3391e-03, PNorm = 172.5723, GNorm = 0.0363, lr_0 = 2.0989e-04
Loss = 2.6273e-03, PNorm = 172.5778, GNorm = 0.0863, lr_0 = 2.0975e-04
Loss = 3.9188e-03, PNorm = 172.5846, GNorm = 0.3355, lr_0 = 2.0960e-04
Validation mae = 0.278950
Epoch 21
Loss = 2.1657e-03, PNorm = 172.5900, GNorm = 0.2040, lr_0 = 2.0946e-04
Loss = 2.5851e-03, PNorm = 172.5920, GNorm = 0.0767, lr_0 = 2.0932e-04
Loss = 2.3089e-03, PNorm = 172.5945, GNorm = 0.1066, lr_0 = 2.0917e-04
Loss = 1.9686e-03, PNorm = 172.5978, GNorm = 0.1047, lr_0 = 2.0903e-04
Loss = 2.7557e-03, PNorm = 172.6008, GNorm = 0.2461, lr_0 = 2.0889e-04
Loss = 2.9581e-03, PNorm = 172.6045, GNorm = 0.2296, lr_0 = 2.0874e-04
Loss = 2.1587e-03, PNorm = 172.6090, GNorm = 0.1845, lr_0 = 2.0860e-04
Loss = 2.2197e-03, PNorm = 172.6139, GNorm = 0.3539, lr_0 = 2.0846e-04
Loss = 2.7291e-03, PNorm = 172.6186, GNorm = 0.2685, lr_0 = 2.0831e-04
Loss = 1.7624e-03, PNorm = 172.6223, GNorm = 0.0442, lr_0 = 2.0817e-04
Loss = 2.1847e-03, PNorm = 172.6270, GNorm = 0.1141, lr_0 = 2.0803e-04
Loss = 3.4852e-03, PNorm = 172.6285, GNorm = 0.0848, lr_0 = 2.0789e-04
Loss = 2.3925e-03, PNorm = 172.6310, GNorm = 0.0603, lr_0 = 2.0774e-04
Loss = 1.9481e-03, PNorm = 172.6347, GNorm = 0.1386, lr_0 = 2.0760e-04
Loss = 2.0626e-03, PNorm = 172.6383, GNorm = 0.0904, lr_0 = 2.0746e-04
Loss = 2.1686e-03, PNorm = 172.6410, GNorm = 0.1030, lr_0 = 2.0732e-04
Loss = 2.3434e-03, PNorm = 172.6442, GNorm = 0.1995, lr_0 = 2.0718e-04
Loss = 2.3835e-03, PNorm = 172.6489, GNorm = 0.2266, lr_0 = 2.0703e-04
Loss = 1.5858e-03, PNorm = 172.6528, GNorm = 0.1994, lr_0 = 2.0689e-04
Loss = 2.8793e-03, PNorm = 172.6557, GNorm = 0.1009, lr_0 = 2.0675e-04
Loss = 3.4524e-03, PNorm = 172.6585, GNorm = 0.0986, lr_0 = 2.0661e-04
Loss = 3.6581e-03, PNorm = 172.6637, GNorm = 0.1294, lr_0 = 2.0647e-04
Loss = 2.5895e-03, PNorm = 172.6690, GNorm = 0.1011, lr_0 = 2.0633e-04
Loss = 1.9815e-03, PNorm = 172.6734, GNorm = 0.0969, lr_0 = 2.0618e-04
Loss = 1.6996e-03, PNorm = 172.6774, GNorm = 0.1479, lr_0 = 2.0604e-04
Loss = 1.9675e-03, PNorm = 172.6823, GNorm = 0.1538, lr_0 = 2.0590e-04
Loss = 2.6481e-03, PNorm = 172.6861, GNorm = 0.1269, lr_0 = 2.0576e-04
Loss = 2.8494e-03, PNorm = 172.6905, GNorm = 0.0393, lr_0 = 2.0562e-04
Loss = 2.3860e-03, PNorm = 172.6951, GNorm = 0.1582, lr_0 = 2.0548e-04
Loss = 1.9882e-03, PNorm = 172.6986, GNorm = 0.1730, lr_0 = 2.0534e-04
Loss = 2.4547e-03, PNorm = 172.6998, GNorm = 0.1443, lr_0 = 2.0520e-04
Loss = 2.7050e-03, PNorm = 172.7038, GNorm = 0.1270, lr_0 = 2.0506e-04
Loss = 3.9290e-03, PNorm = 172.7085, GNorm = 0.1272, lr_0 = 2.0492e-04
Loss = 2.1502e-03, PNorm = 172.7141, GNorm = 0.2158, lr_0 = 2.0478e-04
Loss = 2.1974e-03, PNorm = 172.7193, GNorm = 0.1731, lr_0 = 2.0464e-04
Loss = 2.6652e-03, PNorm = 172.7237, GNorm = 0.3438, lr_0 = 2.0450e-04
Loss = 2.4249e-03, PNorm = 172.7257, GNorm = 0.1209, lr_0 = 2.0436e-04
Loss = 2.9219e-03, PNorm = 172.7300, GNorm = 0.0613, lr_0 = 2.0422e-04
Loss = 2.3034e-03, PNorm = 172.7343, GNorm = 0.1132, lr_0 = 2.0408e-04
Loss = 3.3622e-03, PNorm = 172.7385, GNorm = 0.2793, lr_0 = 2.0394e-04
Loss = 2.5831e-03, PNorm = 172.7441, GNorm = 0.2059, lr_0 = 2.0380e-04
Loss = 3.4999e-03, PNorm = 172.7466, GNorm = 0.1378, lr_0 = 2.0366e-04
Loss = 2.7862e-03, PNorm = 172.7513, GNorm = 0.2218, lr_0 = 2.0352e-04
Loss = 2.0459e-03, PNorm = 172.7574, GNorm = 0.0894, lr_0 = 2.0338e-04
Loss = 1.9135e-03, PNorm = 172.7635, GNorm = 0.1310, lr_0 = 2.0324e-04
Loss = 1.8698e-03, PNorm = 172.7698, GNorm = 0.0727, lr_0 = 2.0310e-04
Loss = 2.4115e-03, PNorm = 172.7751, GNorm = 0.0767, lr_0 = 2.0296e-04
Loss = 4.2669e-03, PNorm = 172.7785, GNorm = 0.4328, lr_0 = 2.0282e-04
Loss = 2.9905e-03, PNorm = 172.7819, GNorm = 0.1920, lr_0 = 2.0268e-04
Loss = 2.0280e-03, PNorm = 172.7852, GNorm = 0.1415, lr_0 = 2.0254e-04
Loss = 1.7718e-03, PNorm = 172.7901, GNorm = 0.1562, lr_0 = 2.0240e-04
Loss = 3.0145e-03, PNorm = 172.7936, GNorm = 0.0498, lr_0 = 2.0227e-04
Loss = 6.7078e-03, PNorm = 172.7985, GNorm = 0.1478, lr_0 = 2.0213e-04
Loss = 3.0312e-03, PNorm = 172.8040, GNorm = 0.0864, lr_0 = 2.0199e-04
Loss = 2.3172e-03, PNorm = 172.8094, GNorm = 0.0694, lr_0 = 2.0185e-04
Loss = 2.7443e-03, PNorm = 172.8140, GNorm = 0.1018, lr_0 = 2.0171e-04
Loss = 3.4100e-03, PNorm = 172.8198, GNorm = 0.1147, lr_0 = 2.0157e-04
Loss = 2.4051e-03, PNorm = 172.8278, GNorm = 0.0900, lr_0 = 2.0144e-04
Loss = 3.6878e-03, PNorm = 172.8323, GNorm = 0.0918, lr_0 = 2.0130e-04
Loss = 1.7895e-03, PNorm = 172.8348, GNorm = 0.0716, lr_0 = 2.0116e-04
Loss = 2.5347e-03, PNorm = 172.8355, GNorm = 0.0941, lr_0 = 2.0102e-04
Loss = 2.5325e-03, PNorm = 172.8367, GNorm = 0.0839, lr_0 = 2.0088e-04
Loss = 2.4077e-03, PNorm = 172.8417, GNorm = 0.1291, lr_0 = 2.0075e-04
Loss = 1.9357e-03, PNorm = 172.8474, GNorm = 0.1594, lr_0 = 2.0061e-04
Loss = 2.0494e-03, PNorm = 172.8524, GNorm = 0.1952, lr_0 = 2.0047e-04
Loss = 2.8562e-03, PNorm = 172.8557, GNorm = 0.1927, lr_0 = 2.0033e-04
Loss = 1.6732e-03, PNorm = 172.8591, GNorm = 0.0897, lr_0 = 2.0020e-04
Loss = 2.3241e-03, PNorm = 172.8622, GNorm = 0.0829, lr_0 = 2.0006e-04
Loss = 1.8909e-03, PNorm = 172.8656, GNorm = 0.0925, lr_0 = 1.9992e-04
Loss = 3.3312e-03, PNorm = 172.8696, GNorm = 0.3054, lr_0 = 1.9979e-04
Loss = 3.1762e-03, PNorm = 172.8749, GNorm = 0.0785, lr_0 = 1.9965e-04
Loss = 1.7765e-03, PNorm = 172.8814, GNorm = 0.1730, lr_0 = 1.9951e-04
Loss = 6.6010e-03, PNorm = 172.8848, GNorm = 0.0620, lr_0 = 1.9938e-04
Loss = 4.7219e-03, PNorm = 172.8890, GNorm = 0.1985, lr_0 = 1.9924e-04
Loss = 1.8073e-03, PNorm = 172.8923, GNorm = 0.1735, lr_0 = 1.9910e-04
Loss = 1.9651e-03, PNorm = 172.8980, GNorm = 0.1257, lr_0 = 1.9897e-04
Loss = 2.9952e-03, PNorm = 172.9054, GNorm = 0.1082, lr_0 = 1.9883e-04
Loss = 2.5125e-03, PNorm = 172.9113, GNorm = 0.1107, lr_0 = 1.9869e-04
Loss = 3.5378e-03, PNorm = 172.9139, GNorm = 0.1118, lr_0 = 1.9856e-04
Loss = 2.3876e-03, PNorm = 172.9169, GNorm = 0.2410, lr_0 = 1.9842e-04
Loss = 3.0834e-03, PNorm = 172.9183, GNorm = 0.0931, lr_0 = 1.9829e-04
Loss = 2.1363e-03, PNorm = 172.9220, GNorm = 0.1863, lr_0 = 1.9815e-04
Loss = 2.2950e-03, PNorm = 172.9276, GNorm = 0.0749, lr_0 = 1.9801e-04
Loss = 2.6676e-03, PNorm = 172.9334, GNorm = 0.0740, lr_0 = 1.9788e-04
Loss = 2.2579e-03, PNorm = 172.9386, GNorm = 0.1865, lr_0 = 1.9774e-04
Loss = 1.8744e-03, PNorm = 172.9452, GNorm = 0.2253, lr_0 = 1.9761e-04
Loss = 5.9135e-03, PNorm = 172.9495, GNorm = 0.0882, lr_0 = 1.9747e-04
Loss = 1.9772e-03, PNorm = 172.9530, GNorm = 0.1194, lr_0 = 1.9734e-04
Loss = 2.3269e-03, PNorm = 172.9551, GNorm = 0.1152, lr_0 = 1.9720e-04
Loss = 2.1680e-03, PNorm = 172.9589, GNorm = 0.1320, lr_0 = 1.9707e-04
Loss = 3.1250e-03, PNorm = 172.9646, GNorm = 0.1647, lr_0 = 1.9693e-04
Loss = 3.5406e-03, PNorm = 172.9704, GNorm = 0.0887, lr_0 = 1.9680e-04
Loss = 1.7706e-03, PNorm = 172.9763, GNorm = 0.1343, lr_0 = 1.9666e-04
Loss = 3.3603e-03, PNorm = 172.9805, GNorm = 0.2061, lr_0 = 1.9653e-04
Loss = 2.3875e-03, PNorm = 172.9862, GNorm = 0.1648, lr_0 = 1.9639e-04
Loss = 2.4622e-03, PNorm = 172.9910, GNorm = 0.1740, lr_0 = 1.9626e-04
Loss = 2.1717e-03, PNorm = 172.9946, GNorm = 0.0612, lr_0 = 1.9612e-04
Loss = 1.9408e-03, PNorm = 172.9989, GNorm = 0.1109, lr_0 = 1.9599e-04
Loss = 2.1259e-03, PNorm = 173.0032, GNorm = 0.1852, lr_0 = 1.9585e-04
Loss = 2.1811e-03, PNorm = 173.0064, GNorm = 0.1523, lr_0 = 1.9572e-04
Loss = 2.3923e-03, PNorm = 173.0098, GNorm = 0.1736, lr_0 = 1.9559e-04
Loss = 2.7666e-03, PNorm = 173.0131, GNorm = 0.2792, lr_0 = 1.9545e-04
Loss = 2.5653e-03, PNorm = 173.0171, GNorm = 0.3208, lr_0 = 1.9532e-04
Loss = 2.0853e-03, PNorm = 173.0225, GNorm = 0.4410, lr_0 = 1.9518e-04
Loss = 2.9237e-03, PNorm = 173.0281, GNorm = 0.1417, lr_0 = 1.9505e-04
Loss = 1.8227e-03, PNorm = 173.0348, GNorm = 0.0553, lr_0 = 1.9492e-04
Loss = 2.4906e-03, PNorm = 173.0398, GNorm = 0.3181, lr_0 = 1.9478e-04
Loss = 1.9743e-03, PNorm = 173.0433, GNorm = 0.0609, lr_0 = 1.9465e-04
Loss = 2.6950e-03, PNorm = 173.0447, GNorm = 0.1971, lr_0 = 1.9452e-04
Loss = 1.7692e-03, PNorm = 173.0482, GNorm = 0.0718, lr_0 = 1.9438e-04
Loss = 4.1780e-03, PNorm = 173.0543, GNorm = 0.1830, lr_0 = 1.9425e-04
Loss = 1.9877e-03, PNorm = 173.0593, GNorm = 0.0414, lr_0 = 1.9412e-04
Loss = 2.6251e-03, PNorm = 173.0640, GNorm = 0.2171, lr_0 = 1.9398e-04
Loss = 3.8865e-03, PNorm = 173.0696, GNorm = 0.3253, lr_0 = 1.9385e-04
Loss = 3.2500e-03, PNorm = 173.0736, GNorm = 0.0874, lr_0 = 1.9372e-04
Loss = 2.4976e-03, PNorm = 173.0779, GNorm = 0.1531, lr_0 = 1.9359e-04
Loss = 2.3862e-03, PNorm = 173.0829, GNorm = 0.2449, lr_0 = 1.9345e-04
Loss = 7.7442e-03, PNorm = 173.0847, GNorm = 0.1082, lr_0 = 1.9332e-04
Loss = 2.0299e-03, PNorm = 173.0894, GNorm = 0.0819, lr_0 = 1.9319e-04
Loss = 5.6849e-03, PNorm = 173.0954, GNorm = 0.2474, lr_0 = 1.9306e-04
Validation mae = 0.278745
Epoch 22
Loss = 1.7112e-03, PNorm = 173.1018, GNorm = 0.3608, lr_0 = 1.9292e-04
Loss = 1.6690e-03, PNorm = 173.1064, GNorm = 0.1185, lr_0 = 1.9279e-04
Loss = 2.5544e-03, PNorm = 173.1106, GNorm = 0.0899, lr_0 = 1.9266e-04
Loss = 1.6564e-03, PNorm = 173.1152, GNorm = 0.1267, lr_0 = 1.9253e-04
Loss = 2.3130e-03, PNorm = 173.1183, GNorm = 0.0703, lr_0 = 1.9240e-04
Loss = 2.5918e-03, PNorm = 173.1228, GNorm = 0.1098, lr_0 = 1.9226e-04
Loss = 2.0634e-03, PNorm = 173.1256, GNorm = 0.0958, lr_0 = 1.9213e-04
Loss = 1.6527e-03, PNorm = 173.1266, GNorm = 0.0378, lr_0 = 1.9200e-04
Loss = 2.5505e-03, PNorm = 173.1298, GNorm = 0.1965, lr_0 = 1.9187e-04
Loss = 2.2455e-03, PNorm = 173.1326, GNorm = 0.2589, lr_0 = 1.9174e-04
Loss = 1.7263e-03, PNorm = 173.1354, GNorm = 0.1351, lr_0 = 1.9161e-04
Loss = 2.4449e-03, PNorm = 173.1401, GNorm = 0.1655, lr_0 = 1.9148e-04
Loss = 1.4828e-03, PNorm = 173.1426, GNorm = 0.2487, lr_0 = 1.9134e-04
Loss = 1.6782e-03, PNorm = 173.1472, GNorm = 0.0391, lr_0 = 1.9121e-04
Loss = 2.7879e-03, PNorm = 173.1499, GNorm = 0.1637, lr_0 = 1.9108e-04
Loss = 2.0826e-03, PNorm = 173.1525, GNorm = 0.0827, lr_0 = 1.9095e-04
Loss = 4.3899e-03, PNorm = 173.1553, GNorm = 0.0683, lr_0 = 1.9082e-04
Loss = 1.7810e-03, PNorm = 173.1594, GNorm = 0.0590, lr_0 = 1.9069e-04
Loss = 1.6434e-03, PNorm = 173.1616, GNorm = 0.2820, lr_0 = 1.9056e-04
Loss = 1.7959e-03, PNorm = 173.1634, GNorm = 0.0692, lr_0 = 1.9043e-04
Loss = 2.0028e-03, PNorm = 173.1672, GNorm = 0.3928, lr_0 = 1.9030e-04
Loss = 3.0145e-03, PNorm = 173.1723, GNorm = 0.0979, lr_0 = 1.9017e-04
Loss = 2.3628e-03, PNorm = 173.1771, GNorm = 0.1960, lr_0 = 1.9004e-04
Loss = 2.1096e-03, PNorm = 173.1798, GNorm = 0.0942, lr_0 = 1.8991e-04
Loss = 2.4336e-03, PNorm = 173.1830, GNorm = 0.0938, lr_0 = 1.8978e-04
Loss = 2.1528e-03, PNorm = 173.1860, GNorm = 0.3305, lr_0 = 1.8965e-04
Loss = 2.1850e-03, PNorm = 173.1906, GNorm = 0.1796, lr_0 = 1.8952e-04
Loss = 2.7906e-03, PNorm = 173.1960, GNorm = 0.3692, lr_0 = 1.8939e-04
Loss = 3.4860e-03, PNorm = 173.1993, GNorm = 0.1229, lr_0 = 1.8926e-04
Loss = 2.8795e-03, PNorm = 173.2040, GNorm = 0.2135, lr_0 = 1.8913e-04
Loss = 2.8006e-03, PNorm = 173.2065, GNorm = 0.1811, lr_0 = 1.8900e-04
Loss = 3.4422e-03, PNorm = 173.2102, GNorm = 0.2581, lr_0 = 1.8887e-04
Loss = 1.5525e-03, PNorm = 173.2131, GNorm = 0.0841, lr_0 = 1.8874e-04
Loss = 2.3663e-03, PNorm = 173.2177, GNorm = 0.1034, lr_0 = 1.8861e-04
Loss = 3.0609e-03, PNorm = 173.2222, GNorm = 0.1053, lr_0 = 1.8848e-04
Loss = 3.2886e-03, PNorm = 173.2261, GNorm = 0.0930, lr_0 = 1.8835e-04
Loss = 1.9810e-03, PNorm = 173.2317, GNorm = 0.0532, lr_0 = 1.8822e-04
Loss = 1.6899e-03, PNorm = 173.2360, GNorm = 0.2219, lr_0 = 1.8809e-04
Loss = 2.1340e-03, PNorm = 173.2411, GNorm = 0.1266, lr_0 = 1.8797e-04
Loss = 1.9101e-03, PNorm = 173.2444, GNorm = 0.1048, lr_0 = 1.8784e-04
Loss = 2.2460e-03, PNorm = 173.2489, GNorm = 0.0582, lr_0 = 1.8771e-04
Loss = 2.6441e-03, PNorm = 173.2529, GNorm = 0.1241, lr_0 = 1.8758e-04
Loss = 4.0882e-03, PNorm = 173.2572, GNorm = 0.0664, lr_0 = 1.8745e-04
Loss = 2.3034e-03, PNorm = 173.2609, GNorm = 0.1112, lr_0 = 1.8732e-04
Loss = 2.1210e-03, PNorm = 173.2632, GNorm = 0.2476, lr_0 = 1.8719e-04
Loss = 5.0063e-03, PNorm = 173.2679, GNorm = 0.0462, lr_0 = 1.8707e-04
Loss = 1.5522e-03, PNorm = 173.2713, GNorm = 0.0439, lr_0 = 1.8694e-04
Loss = 2.9000e-03, PNorm = 173.2770, GNorm = 0.2191, lr_0 = 1.8681e-04
Loss = 2.1115e-03, PNorm = 173.2826, GNorm = 0.3093, lr_0 = 1.8668e-04
Loss = 2.0794e-03, PNorm = 173.2878, GNorm = 0.2116, lr_0 = 1.8655e-04
Loss = 2.5659e-03, PNorm = 173.2915, GNorm = 0.0768, lr_0 = 1.8643e-04
Loss = 1.5526e-03, PNorm = 173.2927, GNorm = 0.0552, lr_0 = 1.8630e-04
Loss = 1.7235e-03, PNorm = 173.2953, GNorm = 0.0705, lr_0 = 1.8617e-04
Loss = 1.7373e-03, PNorm = 173.2992, GNorm = 0.1090, lr_0 = 1.8604e-04
Loss = 1.4820e-03, PNorm = 173.3024, GNorm = 0.0648, lr_0 = 1.8592e-04
Loss = 2.0634e-03, PNorm = 173.3057, GNorm = 0.3529, lr_0 = 1.8579e-04
Loss = 3.8068e-03, PNorm = 173.3094, GNorm = 1.0043, lr_0 = 1.8566e-04
Loss = 1.6912e-03, PNorm = 173.3144, GNorm = 0.0909, lr_0 = 1.8553e-04
Loss = 1.7766e-03, PNorm = 173.3194, GNorm = 0.2415, lr_0 = 1.8541e-04
Loss = 2.8157e-03, PNorm = 173.3236, GNorm = 0.1781, lr_0 = 1.8528e-04
Loss = 1.7887e-03, PNorm = 173.3277, GNorm = 0.1786, lr_0 = 1.8515e-04
Loss = 2.1807e-03, PNorm = 173.3325, GNorm = 0.3379, lr_0 = 1.8503e-04
Loss = 2.5781e-03, PNorm = 173.3352, GNorm = 0.0593, lr_0 = 1.8490e-04
Loss = 2.2129e-03, PNorm = 173.3390, GNorm = 0.0491, lr_0 = 1.8477e-04
Loss = 1.6878e-03, PNorm = 173.3432, GNorm = 0.1222, lr_0 = 1.8465e-04
Loss = 2.8294e-03, PNorm = 173.3457, GNorm = 0.2761, lr_0 = 1.8452e-04
Loss = 4.5136e-03, PNorm = 173.3492, GNorm = 0.0758, lr_0 = 1.8439e-04
Loss = 2.4268e-03, PNorm = 173.3528, GNorm = 0.4245, lr_0 = 1.8427e-04
Loss = 2.6782e-03, PNorm = 173.3563, GNorm = 0.0869, lr_0 = 1.8414e-04
Loss = 1.8418e-03, PNorm = 173.3598, GNorm = 0.1069, lr_0 = 1.8401e-04
Loss = 7.1991e-03, PNorm = 173.3624, GNorm = 0.2735, lr_0 = 1.8389e-04
Loss = 2.6299e-03, PNorm = 173.3643, GNorm = 0.2341, lr_0 = 1.8376e-04
Loss = 1.8095e-03, PNorm = 173.3673, GNorm = 0.1071, lr_0 = 1.8364e-04
Loss = 1.5137e-03, PNorm = 173.3698, GNorm = 0.0697, lr_0 = 1.8351e-04
Loss = 1.5006e-03, PNorm = 173.3735, GNorm = 0.1266, lr_0 = 1.8338e-04
Loss = 2.4260e-03, PNorm = 173.3760, GNorm = 0.0715, lr_0 = 1.8326e-04
Loss = 4.9042e-03, PNorm = 173.3787, GNorm = 0.1197, lr_0 = 1.8313e-04
Loss = 1.8858e-03, PNorm = 173.3810, GNorm = 0.3528, lr_0 = 1.8301e-04
Loss = 2.8982e-03, PNorm = 173.3832, GNorm = 0.2018, lr_0 = 1.8288e-04
Loss = 1.9856e-03, PNorm = 173.3874, GNorm = 0.0590, lr_0 = 1.8276e-04
Loss = 1.7552e-03, PNorm = 173.3923, GNorm = 0.0683, lr_0 = 1.8263e-04
Loss = 2.1502e-03, PNorm = 173.3956, GNorm = 0.1619, lr_0 = 1.8251e-04
Loss = 1.8301e-03, PNorm = 173.4001, GNorm = 0.1832, lr_0 = 1.8238e-04
Loss = 4.4625e-03, PNorm = 173.4041, GNorm = 0.1509, lr_0 = 1.8226e-04
Loss = 6.4742e-03, PNorm = 173.4076, GNorm = 0.0974, lr_0 = 1.8213e-04
Loss = 2.3995e-03, PNorm = 173.4121, GNorm = 0.1933, lr_0 = 1.8201e-04
Loss = 1.6283e-03, PNorm = 173.4159, GNorm = 0.2838, lr_0 = 1.8188e-04
Loss = 1.8773e-03, PNorm = 173.4212, GNorm = 0.0798, lr_0 = 1.8176e-04
Loss = 2.7665e-03, PNorm = 173.4255, GNorm = 0.2310, lr_0 = 1.8163e-04
Loss = 1.8385e-03, PNorm = 173.4297, GNorm = 0.1518, lr_0 = 1.8151e-04
Loss = 3.0614e-03, PNorm = 173.4341, GNorm = 0.1848, lr_0 = 1.8138e-04
Loss = 2.3101e-03, PNorm = 173.4385, GNorm = 0.1958, lr_0 = 1.8126e-04
Loss = 1.6459e-03, PNorm = 173.4430, GNorm = 0.2217, lr_0 = 1.8114e-04
Loss = 2.5705e-03, PNorm = 173.4475, GNorm = 0.0965, lr_0 = 1.8101e-04
Loss = 1.9063e-03, PNorm = 173.4514, GNorm = 0.0872, lr_0 = 1.8089e-04
Loss = 1.8599e-03, PNorm = 173.4566, GNorm = 0.1007, lr_0 = 1.8076e-04
Loss = 1.8828e-03, PNorm = 173.4611, GNorm = 0.0804, lr_0 = 1.8064e-04
Loss = 1.7933e-03, PNorm = 173.4661, GNorm = 0.1066, lr_0 = 1.8052e-04
Loss = 3.3637e-03, PNorm = 173.4698, GNorm = 0.2163, lr_0 = 1.8039e-04
Loss = 2.0012e-03, PNorm = 173.4745, GNorm = 0.0804, lr_0 = 1.8027e-04
Loss = 2.8753e-03, PNorm = 173.4792, GNorm = 0.1054, lr_0 = 1.8015e-04
Loss = 2.3025e-03, PNorm = 173.4836, GNorm = 0.1597, lr_0 = 1.8002e-04
Loss = 3.3802e-03, PNorm = 173.4879, GNorm = 0.1616, lr_0 = 1.7990e-04
Loss = 2.0583e-03, PNorm = 173.4913, GNorm = 0.0807, lr_0 = 1.7978e-04
Loss = 1.9051e-03, PNorm = 173.4927, GNorm = 0.1054, lr_0 = 1.7965e-04
Loss = 1.6459e-03, PNorm = 173.4976, GNorm = 0.1715, lr_0 = 1.7953e-04
Loss = 2.1212e-03, PNorm = 173.5009, GNorm = 0.0677, lr_0 = 1.7941e-04
Loss = 1.6836e-03, PNorm = 173.5043, GNorm = 0.2093, lr_0 = 1.7928e-04
Loss = 1.6712e-03, PNorm = 173.5073, GNorm = 0.0659, lr_0 = 1.7916e-04
Loss = 2.2576e-03, PNorm = 173.5095, GNorm = 0.1795, lr_0 = 1.7904e-04
Loss = 2.3840e-03, PNorm = 173.5130, GNorm = 0.1537, lr_0 = 1.7892e-04
Loss = 2.2020e-03, PNorm = 173.5177, GNorm = 0.1327, lr_0 = 1.7879e-04
Loss = 2.0253e-03, PNorm = 173.5215, GNorm = 0.2254, lr_0 = 1.7867e-04
Loss = 1.3349e-03, PNorm = 173.5261, GNorm = 0.0986, lr_0 = 1.7855e-04
Loss = 1.6222e-03, PNorm = 173.5310, GNorm = 0.1303, lr_0 = 1.7843e-04
Loss = 2.0278e-03, PNorm = 173.5363, GNorm = 0.1370, lr_0 = 1.7830e-04
Loss = 3.0115e-03, PNorm = 173.5407, GNorm = 0.1092, lr_0 = 1.7818e-04
Loss = 1.8704e-03, PNorm = 173.5444, GNorm = 0.0412, lr_0 = 1.7806e-04
Loss = 2.3397e-03, PNorm = 173.5470, GNorm = 0.1246, lr_0 = 1.7794e-04
Loss = 2.0969e-03, PNorm = 173.5505, GNorm = 0.0665, lr_0 = 1.7782e-04
Validation mae = 0.278423
Epoch 23
Loss = 1.8178e-03, PNorm = 173.5536, GNorm = 0.0940, lr_0 = 1.7769e-04
Loss = 1.5606e-03, PNorm = 173.5549, GNorm = 0.0993, lr_0 = 1.7757e-04
Loss = 1.2912e-03, PNorm = 173.5564, GNorm = 0.1483, lr_0 = 1.7745e-04
Loss = 1.4741e-03, PNorm = 173.5588, GNorm = 0.2960, lr_0 = 1.7733e-04
Loss = 3.0995e-03, PNorm = 173.5618, GNorm = 0.1946, lr_0 = 1.7721e-04
Loss = 2.4937e-03, PNorm = 173.5650, GNorm = 0.1501, lr_0 = 1.7709e-04
Loss = 1.7764e-03, PNorm = 173.5679, GNorm = 0.1445, lr_0 = 1.7696e-04
Loss = 2.5920e-03, PNorm = 173.5705, GNorm = 0.0959, lr_0 = 1.7684e-04
Loss = 2.6111e-03, PNorm = 173.5730, GNorm = 0.3583, lr_0 = 1.7672e-04
Loss = 1.8977e-03, PNorm = 173.5737, GNorm = 0.0683, lr_0 = 1.7660e-04
Loss = 1.5941e-03, PNorm = 173.5779, GNorm = 0.1956, lr_0 = 1.7648e-04
Loss = 1.4732e-03, PNorm = 173.5816, GNorm = 0.0598, lr_0 = 1.7636e-04
Loss = 2.3090e-03, PNorm = 173.5848, GNorm = 0.2261, lr_0 = 1.7624e-04
Loss = 1.2485e-03, PNorm = 173.5881, GNorm = 0.1015, lr_0 = 1.7612e-04
Loss = 1.2110e-03, PNorm = 173.5915, GNorm = 0.0311, lr_0 = 1.7600e-04
Loss = 2.1636e-03, PNorm = 173.5951, GNorm = 0.1206, lr_0 = 1.7588e-04
Loss = 1.8354e-03, PNorm = 173.5987, GNorm = 0.1380, lr_0 = 1.7576e-04
Loss = 4.0990e-03, PNorm = 173.5993, GNorm = 0.1905, lr_0 = 1.7564e-04
Loss = 3.9005e-03, PNorm = 173.6022, GNorm = 0.1715, lr_0 = 1.7552e-04
Loss = 1.6573e-03, PNorm = 173.6039, GNorm = 0.0612, lr_0 = 1.7540e-04
Loss = 2.6495e-03, PNorm = 173.6068, GNorm = 0.1570, lr_0 = 1.7528e-04
Loss = 2.0060e-03, PNorm = 173.6096, GNorm = 0.1097, lr_0 = 1.7516e-04
Loss = 2.0360e-03, PNorm = 173.6129, GNorm = 0.0568, lr_0 = 1.7504e-04
Loss = 2.6092e-03, PNorm = 173.6139, GNorm = 0.0991, lr_0 = 1.7492e-04
Loss = 2.9110e-03, PNorm = 173.6169, GNorm = 0.2388, lr_0 = 1.7480e-04
Loss = 2.5656e-03, PNorm = 173.6189, GNorm = 0.1035, lr_0 = 1.7468e-04
Loss = 2.4794e-03, PNorm = 173.6213, GNorm = 0.1620, lr_0 = 1.7456e-04
Loss = 2.9559e-03, PNorm = 173.6233, GNorm = 0.2616, lr_0 = 1.7444e-04
Loss = 3.7889e-03, PNorm = 173.6260, GNorm = 0.1649, lr_0 = 1.7432e-04
Loss = 1.4139e-03, PNorm = 173.6299, GNorm = 0.1550, lr_0 = 1.7420e-04
Loss = 1.8600e-03, PNorm = 173.6348, GNorm = 0.2810, lr_0 = 1.7408e-04
Loss = 2.2636e-03, PNorm = 173.6376, GNorm = 0.0952, lr_0 = 1.7396e-04
Loss = 1.6423e-03, PNorm = 173.6405, GNorm = 0.1727, lr_0 = 1.7384e-04
Loss = 1.1319e-03, PNorm = 173.6431, GNorm = 0.0656, lr_0 = 1.7372e-04
Loss = 1.2996e-03, PNorm = 173.6447, GNorm = 0.1272, lr_0 = 1.7360e-04
Loss = 2.9362e-03, PNorm = 173.6450, GNorm = 0.1800, lr_0 = 1.7348e-04
Loss = 1.3541e-03, PNorm = 173.6478, GNorm = 0.0360, lr_0 = 1.7336e-04
Loss = 1.5993e-03, PNorm = 173.6500, GNorm = 0.1475, lr_0 = 1.7325e-04
Loss = 1.4195e-03, PNorm = 173.6528, GNorm = 0.2155, lr_0 = 1.7313e-04
Loss = 1.6307e-03, PNorm = 173.6560, GNorm = 0.1297, lr_0 = 1.7301e-04
Loss = 1.3123e-03, PNorm = 173.6584, GNorm = 0.0810, lr_0 = 1.7289e-04
Loss = 1.8150e-03, PNorm = 173.6598, GNorm = 0.3094, lr_0 = 1.7277e-04
Loss = 1.6796e-03, PNorm = 173.6625, GNorm = 0.0729, lr_0 = 1.7265e-04
Loss = 1.3591e-03, PNorm = 173.6631, GNorm = 0.3275, lr_0 = 1.7253e-04
Loss = 1.4153e-03, PNorm = 173.6656, GNorm = 0.3388, lr_0 = 1.7242e-04
Loss = 3.4185e-03, PNorm = 173.6677, GNorm = 0.0416, lr_0 = 1.7230e-04
Loss = 1.4309e-03, PNorm = 173.6725, GNorm = 0.1249, lr_0 = 1.7218e-04
Loss = 1.4338e-03, PNorm = 173.6766, GNorm = 0.0777, lr_0 = 1.7206e-04
Loss = 1.7839e-03, PNorm = 173.6809, GNorm = 0.0442, lr_0 = 1.7194e-04
Loss = 1.4541e-03, PNorm = 173.6839, GNorm = 0.1121, lr_0 = 1.7183e-04
Loss = 2.6403e-03, PNorm = 173.6868, GNorm = 0.1864, lr_0 = 1.7171e-04
Loss = 3.6637e-03, PNorm = 173.6896, GNorm = 0.0369, lr_0 = 1.7159e-04
Loss = 4.8552e-03, PNorm = 173.6920, GNorm = 0.1179, lr_0 = 1.7147e-04
Loss = 2.9948e-03, PNorm = 173.6963, GNorm = 0.1888, lr_0 = 1.7136e-04
Loss = 1.2609e-03, PNorm = 173.6986, GNorm = 0.1097, lr_0 = 1.7124e-04
Loss = 2.2561e-03, PNorm = 173.7010, GNorm = 0.0698, lr_0 = 1.7112e-04
Loss = 1.4402e-03, PNorm = 173.7048, GNorm = 0.0598, lr_0 = 1.7100e-04
Loss = 2.0481e-03, PNorm = 173.7063, GNorm = 0.2041, lr_0 = 1.7089e-04
Loss = 2.8569e-03, PNorm = 173.7098, GNorm = 0.4650, lr_0 = 1.7077e-04
Loss = 2.8665e-03, PNorm = 173.7132, GNorm = 0.4982, lr_0 = 1.7065e-04
Loss = 1.4191e-03, PNorm = 173.7195, GNorm = 0.1108, lr_0 = 1.7054e-04
Loss = 1.5156e-03, PNorm = 173.7232, GNorm = 0.1317, lr_0 = 1.7042e-04
Loss = 1.9665e-03, PNorm = 173.7259, GNorm = 0.1779, lr_0 = 1.7030e-04
Loss = 2.0154e-03, PNorm = 173.7305, GNorm = 0.1218, lr_0 = 1.7019e-04
Loss = 4.1443e-03, PNorm = 173.7359, GNorm = 0.2749, lr_0 = 1.7007e-04
Loss = 1.6954e-03, PNorm = 173.7407, GNorm = 0.1677, lr_0 = 1.6995e-04
Loss = 2.3515e-03, PNorm = 173.7432, GNorm = 0.1752, lr_0 = 1.6984e-04
Loss = 2.0303e-03, PNorm = 173.7452, GNorm = 0.0557, lr_0 = 1.6972e-04
Loss = 1.6403e-03, PNorm = 173.7489, GNorm = 0.0543, lr_0 = 1.6960e-04
Loss = 2.1176e-03, PNorm = 173.7519, GNorm = 0.1102, lr_0 = 1.6949e-04
Loss = 1.4921e-03, PNorm = 173.7550, GNorm = 0.0776, lr_0 = 1.6937e-04
Loss = 1.3053e-03, PNorm = 173.7577, GNorm = 0.0747, lr_0 = 1.6926e-04
Loss = 3.3739e-03, PNorm = 173.7600, GNorm = 0.0968, lr_0 = 1.6914e-04
Loss = 1.4705e-03, PNorm = 173.7628, GNorm = 0.1334, lr_0 = 1.6902e-04
Loss = 1.6890e-03, PNorm = 173.7647, GNorm = 0.1146, lr_0 = 1.6891e-04
Loss = 1.3095e-03, PNorm = 173.7668, GNorm = 0.1708, lr_0 = 1.6879e-04
Loss = 3.2258e-03, PNorm = 173.7707, GNorm = 0.2376, lr_0 = 1.6868e-04
Loss = 1.8056e-03, PNorm = 173.7740, GNorm = 0.1719, lr_0 = 1.6856e-04
Loss = 1.7290e-03, PNorm = 173.7767, GNorm = 0.1461, lr_0 = 1.6845e-04
Loss = 1.1146e-03, PNorm = 173.7801, GNorm = 0.1612, lr_0 = 1.6833e-04
Loss = 2.4664e-03, PNorm = 173.7832, GNorm = 0.0549, lr_0 = 1.6821e-04
Loss = 3.7351e-03, PNorm = 173.7903, GNorm = 0.2304, lr_0 = 1.6810e-04
Loss = 1.5959e-03, PNorm = 173.7959, GNorm = 0.1641, lr_0 = 1.6798e-04
Loss = 2.9886e-03, PNorm = 173.7982, GNorm = 0.1530, lr_0 = 1.6787e-04
Loss = 1.7573e-03, PNorm = 173.8000, GNorm = 0.0531, lr_0 = 1.6775e-04
Loss = 3.5127e-03, PNorm = 173.8032, GNorm = 0.3551, lr_0 = 1.6764e-04
Loss = 2.1842e-03, PNorm = 173.8071, GNorm = 0.0616, lr_0 = 1.6752e-04
Loss = 1.7432e-03, PNorm = 173.8109, GNorm = 0.3803, lr_0 = 1.6741e-04
Loss = 1.9872e-03, PNorm = 173.8165, GNorm = 0.2726, lr_0 = 1.6729e-04
Loss = 1.3979e-03, PNorm = 173.8208, GNorm = 0.0391, lr_0 = 1.6718e-04
Loss = 1.5924e-03, PNorm = 173.8241, GNorm = 0.0616, lr_0 = 1.6707e-04
Loss = 2.5090e-03, PNorm = 173.8276, GNorm = 0.5085, lr_0 = 1.6695e-04
Loss = 3.9727e-03, PNorm = 173.8295, GNorm = 0.2266, lr_0 = 1.6684e-04
Loss = 2.7470e-03, PNorm = 173.8340, GNorm = 0.1593, lr_0 = 1.6672e-04
Loss = 1.8220e-03, PNorm = 173.8356, GNorm = 0.1048, lr_0 = 1.6661e-04
Loss = 1.9205e-03, PNorm = 173.8369, GNorm = 0.1571, lr_0 = 1.6649e-04
Loss = 1.7156e-03, PNorm = 173.8414, GNorm = 0.1198, lr_0 = 1.6638e-04
Loss = 2.0955e-03, PNorm = 173.8458, GNorm = 0.1977, lr_0 = 1.6627e-04
Loss = 2.9874e-03, PNorm = 173.8510, GNorm = 0.1796, lr_0 = 1.6615e-04
Loss = 2.5363e-03, PNorm = 173.8548, GNorm = 0.1456, lr_0 = 1.6604e-04
Loss = 2.0122e-03, PNorm = 173.8559, GNorm = 0.4758, lr_0 = 1.6592e-04
Loss = 1.9673e-03, PNorm = 173.8579, GNorm = 0.2690, lr_0 = 1.6581e-04
Loss = 1.9793e-03, PNorm = 173.8615, GNorm = 0.1173, lr_0 = 1.6570e-04
Loss = 3.4195e-03, PNorm = 173.8646, GNorm = 0.2054, lr_0 = 1.6558e-04
Loss = 2.1195e-03, PNorm = 173.8698, GNorm = 0.1865, lr_0 = 1.6547e-04
Loss = 2.4196e-03, PNorm = 173.8742, GNorm = 0.1325, lr_0 = 1.6536e-04
Loss = 1.9336e-03, PNorm = 173.8759, GNorm = 0.2180, lr_0 = 1.6524e-04
Loss = 2.0131e-03, PNorm = 173.8786, GNorm = 0.1582, lr_0 = 1.6513e-04
Loss = 1.3600e-03, PNorm = 173.8813, GNorm = 0.0856, lr_0 = 1.6502e-04
Loss = 3.1690e-03, PNorm = 173.8873, GNorm = 0.1584, lr_0 = 1.6490e-04
Loss = 1.6331e-03, PNorm = 173.8907, GNorm = 0.0777, lr_0 = 1.6479e-04
Loss = 1.6472e-03, PNorm = 173.8946, GNorm = 0.0441, lr_0 = 1.6468e-04
Loss = 1.4633e-03, PNorm = 173.8982, GNorm = 0.2069, lr_0 = 1.6457e-04
Loss = 1.8414e-03, PNorm = 173.9026, GNorm = 0.0382, lr_0 = 1.6445e-04
Loss = 1.2257e-03, PNorm = 173.9048, GNorm = 0.1218, lr_0 = 1.6434e-04
Loss = 1.5937e-03, PNorm = 173.9072, GNorm = 0.2116, lr_0 = 1.6423e-04
Loss = 1.8825e-03, PNorm = 173.9104, GNorm = 0.2283, lr_0 = 1.6412e-04
Loss = 2.4206e-03, PNorm = 173.9123, GNorm = 0.1118, lr_0 = 1.6400e-04
Loss = 2.6969e-03, PNorm = 173.9155, GNorm = 0.2070, lr_0 = 1.6389e-04
Loss = 2.5220e-03, PNorm = 173.9182, GNorm = 0.2880, lr_0 = 1.6378e-04
Validation mae = 0.278564
Epoch 24
Loss = 1.5063e-03, PNorm = 173.9225, GNorm = 0.1333, lr_0 = 1.6367e-04
Loss = 1.6264e-03, PNorm = 173.9244, GNorm = 0.1016, lr_0 = 1.6355e-04
Loss = 1.8866e-03, PNorm = 173.9248, GNorm = 0.1579, lr_0 = 1.6344e-04
Loss = 1.1704e-03, PNorm = 173.9251, GNorm = 0.0575, lr_0 = 1.6333e-04
Loss = 1.5175e-03, PNorm = 173.9270, GNorm = 0.3027, lr_0 = 1.6322e-04
Loss = 1.3981e-03, PNorm = 173.9285, GNorm = 0.0765, lr_0 = 1.6311e-04
Loss = 1.5178e-03, PNorm = 173.9331, GNorm = 0.1337, lr_0 = 1.6299e-04
Loss = 1.1611e-03, PNorm = 173.9366, GNorm = 0.1465, lr_0 = 1.6288e-04
Loss = 2.2329e-03, PNorm = 173.9397, GNorm = 0.1217, lr_0 = 1.6277e-04
Loss = 2.5540e-03, PNorm = 173.9430, GNorm = 0.0840, lr_0 = 1.6266e-04
Loss = 1.2648e-03, PNorm = 173.9449, GNorm = 0.1592, lr_0 = 1.6255e-04
Loss = 1.6887e-03, PNorm = 173.9468, GNorm = 0.1660, lr_0 = 1.6244e-04
Loss = 1.5602e-03, PNorm = 173.9490, GNorm = 0.1310, lr_0 = 1.6233e-04
Loss = 1.8013e-03, PNorm = 173.9498, GNorm = 0.0964, lr_0 = 1.6221e-04
Loss = 1.5811e-03, PNorm = 173.9532, GNorm = 0.2281, lr_0 = 1.6210e-04
Loss = 1.5530e-03, PNorm = 173.9549, GNorm = 0.1893, lr_0 = 1.6199e-04
Loss = 2.9431e-03, PNorm = 173.9580, GNorm = 0.1391, lr_0 = 1.6188e-04
Loss = 2.3849e-03, PNorm = 173.9581, GNorm = 0.1971, lr_0 = 1.6177e-04
Loss = 1.1549e-03, PNorm = 173.9606, GNorm = 0.1920, lr_0 = 1.6166e-04
Loss = 2.0457e-03, PNorm = 173.9618, GNorm = 0.1019, lr_0 = 1.6155e-04
Loss = 1.5668e-03, PNorm = 173.9658, GNorm = 0.2490, lr_0 = 1.6144e-04
Loss = 1.8433e-03, PNorm = 173.9687, GNorm = 0.0981, lr_0 = 1.6133e-04
Loss = 2.5617e-03, PNorm = 173.9732, GNorm = 0.1632, lr_0 = 1.6122e-04
Loss = 1.3012e-03, PNorm = 173.9747, GNorm = 0.1726, lr_0 = 1.6111e-04
Loss = 1.2142e-03, PNorm = 173.9777, GNorm = 0.1043, lr_0 = 1.6100e-04
Loss = 2.1735e-03, PNorm = 173.9774, GNorm = 0.2557, lr_0 = 1.6089e-04
Loss = 2.1012e-03, PNorm = 173.9801, GNorm = 0.1246, lr_0 = 1.6078e-04
Loss = 2.1285e-03, PNorm = 173.9831, GNorm = 0.0566, lr_0 = 1.6067e-04
Loss = 1.4443e-03, PNorm = 173.9854, GNorm = 0.1563, lr_0 = 1.6056e-04
Loss = 1.4736e-03, PNorm = 173.9878, GNorm = 0.1007, lr_0 = 1.6045e-04
Loss = 1.6154e-03, PNorm = 173.9900, GNorm = 0.0385, lr_0 = 1.6034e-04
Loss = 1.5467e-03, PNorm = 173.9927, GNorm = 0.1629, lr_0 = 1.6023e-04
Loss = 1.4083e-03, PNorm = 173.9956, GNorm = 0.0584, lr_0 = 1.6012e-04
Loss = 1.4234e-03, PNorm = 174.0002, GNorm = 0.1175, lr_0 = 1.6001e-04
Loss = 1.3518e-03, PNorm = 174.0044, GNorm = 0.0813, lr_0 = 1.5990e-04
Loss = 4.8734e-03, PNorm = 174.0060, GNorm = 0.4347, lr_0 = 1.5979e-04
Loss = 1.3688e-03, PNorm = 174.0087, GNorm = 0.1275, lr_0 = 1.5968e-04
Loss = 2.4702e-03, PNorm = 174.0120, GNorm = 0.1280, lr_0 = 1.5957e-04
Loss = 2.0716e-03, PNorm = 174.0136, GNorm = 0.0903, lr_0 = 1.5946e-04
Loss = 1.3862e-03, PNorm = 174.0149, GNorm = 0.1430, lr_0 = 1.5935e-04
Loss = 1.5786e-03, PNorm = 174.0176, GNorm = 0.2480, lr_0 = 1.5924e-04
Loss = 1.4586e-03, PNorm = 174.0219, GNorm = 0.0919, lr_0 = 1.5913e-04
Loss = 1.4611e-03, PNorm = 174.0249, GNorm = 0.0780, lr_0 = 1.5902e-04
Loss = 1.9617e-03, PNorm = 174.0286, GNorm = 0.1350, lr_0 = 1.5891e-04
Loss = 1.8586e-03, PNorm = 174.0316, GNorm = 0.0793, lr_0 = 1.5880e-04
Loss = 2.1274e-03, PNorm = 174.0350, GNorm = 0.1388, lr_0 = 1.5870e-04
Loss = 1.7364e-03, PNorm = 174.0379, GNorm = 0.2370, lr_0 = 1.5859e-04
Loss = 2.0725e-03, PNorm = 174.0410, GNorm = 0.2489, lr_0 = 1.5848e-04
Loss = 2.9321e-03, PNorm = 174.0420, GNorm = 0.2178, lr_0 = 1.5837e-04
Loss = 1.9349e-03, PNorm = 174.0452, GNorm = 0.4850, lr_0 = 1.5826e-04
Loss = 1.3331e-03, PNorm = 174.0470, GNorm = 0.1375, lr_0 = 1.5815e-04
Loss = 1.7329e-03, PNorm = 174.0507, GNorm = 0.1352, lr_0 = 1.5804e-04
Loss = 1.6079e-03, PNorm = 174.0540, GNorm = 0.0684, lr_0 = 1.5794e-04
Loss = 1.5860e-03, PNorm = 174.0584, GNorm = 0.1439, lr_0 = 1.5783e-04
Loss = 1.2891e-03, PNorm = 174.0618, GNorm = 0.1823, lr_0 = 1.5772e-04
Loss = 1.4934e-03, PNorm = 174.0657, GNorm = 0.0826, lr_0 = 1.5761e-04
Loss = 1.2895e-03, PNorm = 174.0693, GNorm = 0.1027, lr_0 = 1.5750e-04
Loss = 2.0188e-03, PNorm = 174.0726, GNorm = 0.2111, lr_0 = 1.5740e-04
Loss = 1.1804e-03, PNorm = 174.0740, GNorm = 0.1273, lr_0 = 1.5729e-04
Loss = 1.0009e-03, PNorm = 174.0774, GNorm = 0.1115, lr_0 = 1.5718e-04
Loss = 1.5353e-03, PNorm = 174.0806, GNorm = 0.1362, lr_0 = 1.5707e-04
Loss = 1.1498e-03, PNorm = 174.0838, GNorm = 0.3399, lr_0 = 1.5697e-04
Loss = 2.6069e-03, PNorm = 174.0882, GNorm = 0.2614, lr_0 = 1.5686e-04
Loss = 1.3255e-03, PNorm = 174.0923, GNorm = 0.0378, lr_0 = 1.5675e-04
Loss = 2.5937e-03, PNorm = 174.0956, GNorm = 0.1505, lr_0 = 1.5664e-04
Loss = 1.7655e-03, PNorm = 174.0962, GNorm = 0.0903, lr_0 = 1.5654e-04
Loss = 2.3481e-03, PNorm = 174.0961, GNorm = 0.0596, lr_0 = 1.5643e-04
Loss = 2.4756e-03, PNorm = 174.0975, GNorm = 0.0412, lr_0 = 1.5632e-04
Loss = 1.2512e-03, PNorm = 174.1014, GNorm = 0.2545, lr_0 = 1.5621e-04
Loss = 2.7238e-03, PNorm = 174.1055, GNorm = 0.8599, lr_0 = 1.5611e-04
Loss = 2.8107e-03, PNorm = 174.1107, GNorm = 0.1985, lr_0 = 1.5600e-04
Loss = 1.4270e-03, PNorm = 174.1140, GNorm = 0.0792, lr_0 = 1.5589e-04
Loss = 2.0935e-03, PNorm = 174.1175, GNorm = 0.0426, lr_0 = 1.5579e-04
Loss = 2.7140e-03, PNorm = 174.1191, GNorm = 0.1933, lr_0 = 1.5568e-04
Loss = 2.9356e-03, PNorm = 174.1215, GNorm = 0.0557, lr_0 = 1.5557e-04
Loss = 1.3876e-03, PNorm = 174.1255, GNorm = 0.1847, lr_0 = 1.5547e-04
Loss = 2.6115e-03, PNorm = 174.1290, GNorm = 0.1467, lr_0 = 1.5536e-04
Loss = 3.0280e-03, PNorm = 174.1314, GNorm = 0.1585, lr_0 = 1.5525e-04
Loss = 1.9460e-03, PNorm = 174.1341, GNorm = 0.2464, lr_0 = 1.5515e-04
Loss = 1.5928e-03, PNorm = 174.1367, GNorm = 0.1445, lr_0 = 1.5504e-04
Loss = 1.5462e-03, PNorm = 174.1371, GNorm = 0.2983, lr_0 = 1.5493e-04
Loss = 1.2033e-03, PNorm = 174.1383, GNorm = 0.0776, lr_0 = 1.5483e-04
Loss = 3.1793e-03, PNorm = 174.1397, GNorm = 0.1199, lr_0 = 1.5472e-04
Loss = 2.0823e-03, PNorm = 174.1417, GNorm = 0.2265, lr_0 = 1.5462e-04
Loss = 1.2227e-03, PNorm = 174.1448, GNorm = 0.2099, lr_0 = 1.5451e-04
Loss = 1.4972e-03, PNorm = 174.1480, GNorm = 0.1547, lr_0 = 1.5440e-04
Loss = 2.3418e-03, PNorm = 174.1499, GNorm = 0.3036, lr_0 = 1.5430e-04
Loss = 1.5994e-03, PNorm = 174.1522, GNorm = 0.0994, lr_0 = 1.5419e-04
Loss = 1.6025e-03, PNorm = 174.1543, GNorm = 0.0473, lr_0 = 1.5409e-04
Loss = 3.7958e-03, PNorm = 174.1577, GNorm = 0.0698, lr_0 = 1.5398e-04
Loss = 2.6662e-03, PNorm = 174.1640, GNorm = 0.2854, lr_0 = 1.5388e-04
Loss = 1.7648e-03, PNorm = 174.1681, GNorm = 0.1257, lr_0 = 1.5377e-04
Loss = 1.3034e-03, PNorm = 174.1711, GNorm = 0.0446, lr_0 = 1.5367e-04
Loss = 1.3935e-03, PNorm = 174.1719, GNorm = 0.1041, lr_0 = 1.5356e-04
Loss = 4.9429e-03, PNorm = 174.1716, GNorm = 0.1086, lr_0 = 1.5346e-04
Loss = 1.0038e-03, PNorm = 174.1733, GNorm = 0.0887, lr_0 = 1.5335e-04
Loss = 1.1159e-03, PNorm = 174.1758, GNorm = 0.0310, lr_0 = 1.5325e-04
Loss = 1.7575e-03, PNorm = 174.1799, GNorm = 0.1230, lr_0 = 1.5314e-04
Loss = 1.8231e-03, PNorm = 174.1818, GNorm = 0.1366, lr_0 = 1.5304e-04
Loss = 2.7635e-03, PNorm = 174.1885, GNorm = 0.1269, lr_0 = 1.5293e-04
Loss = 2.6258e-03, PNorm = 174.1916, GNorm = 0.2164, lr_0 = 1.5283e-04
Loss = 1.8600e-03, PNorm = 174.1955, GNorm = 0.0732, lr_0 = 1.5272e-04
Loss = 1.1579e-03, PNorm = 174.1979, GNorm = 0.0701, lr_0 = 1.5262e-04
Loss = 3.9401e-03, PNorm = 174.2009, GNorm = 0.1097, lr_0 = 1.5251e-04
Loss = 1.6076e-03, PNorm = 174.2051, GNorm = 0.0770, lr_0 = 1.5241e-04
Loss = 1.3104e-03, PNorm = 174.2074, GNorm = 0.0522, lr_0 = 1.5230e-04
Loss = 2.5632e-03, PNorm = 174.2083, GNorm = 0.2171, lr_0 = 1.5220e-04
Loss = 3.1836e-03, PNorm = 174.2101, GNorm = 0.0733, lr_0 = 1.5209e-04
Loss = 4.3043e-03, PNorm = 174.2130, GNorm = 0.0703, lr_0 = 1.5199e-04
Loss = 2.1595e-03, PNorm = 174.2153, GNorm = 0.0823, lr_0 = 1.5189e-04
Loss = 2.0230e-03, PNorm = 174.2167, GNorm = 0.1758, lr_0 = 1.5178e-04
Loss = 3.0398e-03, PNorm = 174.2205, GNorm = 0.0783, lr_0 = 1.5168e-04
Loss = 1.5393e-03, PNorm = 174.2235, GNorm = 0.2421, lr_0 = 1.5157e-04
Loss = 1.1823e-03, PNorm = 174.2258, GNorm = 0.2016, lr_0 = 1.5147e-04
Loss = 1.7072e-03, PNorm = 174.2283, GNorm = 0.1596, lr_0 = 1.5137e-04
Loss = 3.0442e-03, PNorm = 174.2300, GNorm = 0.0543, lr_0 = 1.5126e-04
Loss = 2.5873e-03, PNorm = 174.2316, GNorm = 0.1354, lr_0 = 1.5116e-04
Loss = 2.1348e-03, PNorm = 174.2318, GNorm = 0.0840, lr_0 = 1.5106e-04
Loss = 1.5785e-03, PNorm = 174.2331, GNorm = 0.1424, lr_0 = 1.5095e-04
Loss = 1.7615e-03, PNorm = 174.2356, GNorm = 0.1077, lr_0 = 1.5085e-04
Validation mae = 0.278442
Epoch 25
Loss = 1.5941e-03, PNorm = 174.2382, GNorm = 0.0575, lr_0 = 1.5075e-04
Loss = 2.0422e-03, PNorm = 174.2405, GNorm = 0.0821, lr_0 = 1.5064e-04
Loss = 1.0017e-03, PNorm = 174.2429, GNorm = 0.2014, lr_0 = 1.5054e-04
Loss = 1.2060e-03, PNorm = 174.2465, GNorm = 0.0330, lr_0 = 1.5044e-04
Loss = 1.2220e-03, PNorm = 174.2489, GNorm = 0.0644, lr_0 = 1.5033e-04
Loss = 4.7736e-03, PNorm = 174.2505, GNorm = 0.3633, lr_0 = 1.5023e-04
Loss = 1.2514e-03, PNorm = 174.2525, GNorm = 0.0947, lr_0 = 1.5013e-04
Loss = 1.1905e-03, PNorm = 174.2545, GNorm = 0.2596, lr_0 = 1.5002e-04
Loss = 1.6541e-03, PNorm = 174.2574, GNorm = 0.1283, lr_0 = 1.4992e-04
Loss = 1.2455e-03, PNorm = 174.2586, GNorm = 0.1158, lr_0 = 1.4982e-04
Loss = 9.2149e-04, PNorm = 174.2605, GNorm = 0.1776, lr_0 = 1.4972e-04
Loss = 1.6701e-03, PNorm = 174.2625, GNorm = 0.0827, lr_0 = 1.4961e-04
Loss = 2.3121e-03, PNorm = 174.2646, GNorm = 0.0672, lr_0 = 1.4951e-04
Loss = 1.7819e-03, PNorm = 174.2669, GNorm = 0.1855, lr_0 = 1.4941e-04
Loss = 2.2605e-03, PNorm = 174.2686, GNorm = 0.0863, lr_0 = 1.4931e-04
Loss = 2.1685e-03, PNorm = 174.2707, GNorm = 0.0840, lr_0 = 1.4920e-04
Loss = 1.1118e-03, PNorm = 174.2737, GNorm = 0.1383, lr_0 = 1.4910e-04
Loss = 1.5451e-03, PNorm = 174.2754, GNorm = 0.0854, lr_0 = 1.4900e-04
Loss = 2.1840e-03, PNorm = 174.2794, GNorm = 0.1874, lr_0 = 1.4890e-04
Loss = 1.1619e-03, PNorm = 174.2806, GNorm = 0.1204, lr_0 = 1.4880e-04
Loss = 1.2327e-03, PNorm = 174.2820, GNorm = 0.0322, lr_0 = 1.4869e-04
Loss = 1.4909e-03, PNorm = 174.2853, GNorm = 0.1451, lr_0 = 1.4859e-04
Loss = 1.3404e-03, PNorm = 174.2872, GNorm = 0.0399, lr_0 = 1.4849e-04
Loss = 1.8033e-03, PNorm = 174.2897, GNorm = 0.1021, lr_0 = 1.4839e-04
Loss = 1.5373e-03, PNorm = 174.2924, GNorm = 0.0496, lr_0 = 1.4829e-04
Loss = 1.0562e-03, PNorm = 174.2940, GNorm = 0.0714, lr_0 = 1.4818e-04
Loss = 1.0147e-03, PNorm = 174.2961, GNorm = 0.0879, lr_0 = 1.4808e-04
Loss = 2.2581e-03, PNorm = 174.2989, GNorm = 0.1734, lr_0 = 1.4798e-04
Loss = 1.6148e-03, PNorm = 174.2990, GNorm = 0.1090, lr_0 = 1.4788e-04
Loss = 9.0723e-04, PNorm = 174.3019, GNorm = 0.0689, lr_0 = 1.4778e-04
Loss = 1.0355e-03, PNorm = 174.3039, GNorm = 0.0842, lr_0 = 1.4768e-04
Loss = 9.8028e-04, PNorm = 174.3052, GNorm = 0.0805, lr_0 = 1.4758e-04
Loss = 2.8313e-03, PNorm = 174.3060, GNorm = 0.6388, lr_0 = 1.4748e-04
Loss = 1.4229e-03, PNorm = 174.3068, GNorm = 0.1566, lr_0 = 1.4737e-04
Loss = 1.7242e-03, PNorm = 174.3086, GNorm = 0.1407, lr_0 = 1.4727e-04
Loss = 1.2058e-03, PNorm = 174.3106, GNorm = 0.1695, lr_0 = 1.4717e-04
Loss = 1.0657e-03, PNorm = 174.3131, GNorm = 0.1353, lr_0 = 1.4707e-04
Loss = 1.3068e-03, PNorm = 174.3163, GNorm = 0.1693, lr_0 = 1.4697e-04
Loss = 9.8934e-04, PNorm = 174.3186, GNorm = 0.0517, lr_0 = 1.4687e-04
Loss = 1.1560e-03, PNorm = 174.3205, GNorm = 0.0361, lr_0 = 1.4677e-04
Loss = 1.8953e-03, PNorm = 174.3228, GNorm = 0.0965, lr_0 = 1.4667e-04
Loss = 9.2630e-04, PNorm = 174.3242, GNorm = 0.0750, lr_0 = 1.4657e-04
Loss = 9.1623e-04, PNorm = 174.3263, GNorm = 0.1433, lr_0 = 1.4647e-04
Loss = 1.6633e-03, PNorm = 174.3285, GNorm = 0.1356, lr_0 = 1.4637e-04
Loss = 1.6943e-03, PNorm = 174.3308, GNorm = 0.0565, lr_0 = 1.4627e-04
Loss = 2.1283e-03, PNorm = 174.3321, GNorm = 0.0259, lr_0 = 1.4617e-04
Loss = 1.3677e-03, PNorm = 174.3343, GNorm = 0.0258, lr_0 = 1.4607e-04
Loss = 1.0728e-03, PNorm = 174.3361, GNorm = 0.1461, lr_0 = 1.4597e-04
Loss = 2.9344e-03, PNorm = 174.3383, GNorm = 0.0534, lr_0 = 1.4587e-04
Loss = 3.6142e-03, PNorm = 174.3408, GNorm = 0.1510, lr_0 = 1.4577e-04
Loss = 1.1552e-03, PNorm = 174.3414, GNorm = 0.1378, lr_0 = 1.4567e-04
Loss = 1.0415e-03, PNorm = 174.3436, GNorm = 0.0647, lr_0 = 1.4557e-04
Loss = 1.5437e-03, PNorm = 174.3457, GNorm = 0.1689, lr_0 = 1.4547e-04
Loss = 1.6364e-03, PNorm = 174.3503, GNorm = 0.4475, lr_0 = 1.4537e-04
Loss = 2.0568e-03, PNorm = 174.3536, GNorm = 0.0816, lr_0 = 1.4527e-04
Loss = 9.5028e-04, PNorm = 174.3561, GNorm = 0.0933, lr_0 = 1.4517e-04
Loss = 1.2504e-03, PNorm = 174.3587, GNorm = 0.1674, lr_0 = 1.4507e-04
Loss = 4.3329e-03, PNorm = 174.3612, GNorm = 0.5555, lr_0 = 1.4497e-04
Loss = 1.7389e-03, PNorm = 174.3632, GNorm = 0.2415, lr_0 = 1.4487e-04
Loss = 4.5984e-03, PNorm = 174.3658, GNorm = 0.1453, lr_0 = 1.4477e-04
Loss = 1.4824e-03, PNorm = 174.3695, GNorm = 0.2206, lr_0 = 1.4467e-04
Loss = 1.3218e-03, PNorm = 174.3733, GNorm = 0.0987, lr_0 = 1.4457e-04
Loss = 1.0721e-03, PNorm = 174.3756, GNorm = 0.0446, lr_0 = 1.4447e-04
Loss = 9.0381e-04, PNorm = 174.3783, GNorm = 0.1341, lr_0 = 1.4438e-04
Loss = 1.6324e-03, PNorm = 174.3794, GNorm = 0.1511, lr_0 = 1.4428e-04
Loss = 8.6549e-04, PNorm = 174.3818, GNorm = 0.1160, lr_0 = 1.4418e-04
Loss = 3.9413e-03, PNorm = 174.3820, GNorm = 0.0509, lr_0 = 1.4408e-04
Loss = 2.5043e-03, PNorm = 174.3822, GNorm = 0.0756, lr_0 = 1.4398e-04
Loss = 1.0727e-03, PNorm = 174.3844, GNorm = 0.0341, lr_0 = 1.4388e-04
Loss = 1.4051e-03, PNorm = 174.3870, GNorm = 0.1559, lr_0 = 1.4378e-04
Loss = 1.8251e-03, PNorm = 174.3904, GNorm = 0.3201, lr_0 = 1.4368e-04
Loss = 2.1201e-03, PNorm = 174.3918, GNorm = 0.0778, lr_0 = 1.4359e-04
Loss = 2.4323e-03, PNorm = 174.3955, GNorm = 0.1537, lr_0 = 1.4349e-04
Loss = 1.0770e-03, PNorm = 174.4001, GNorm = 0.0842, lr_0 = 1.4339e-04
Loss = 2.4756e-03, PNorm = 174.4045, GNorm = 0.0798, lr_0 = 1.4329e-04
Loss = 2.6807e-03, PNorm = 174.4065, GNorm = 0.1101, lr_0 = 1.4319e-04
Loss = 2.3452e-03, PNorm = 174.4071, GNorm = 0.0869, lr_0 = 1.4310e-04
Loss = 3.0848e-03, PNorm = 174.4081, GNorm = 0.1832, lr_0 = 1.4300e-04
Loss = 3.6196e-03, PNorm = 174.4117, GNorm = 0.1215, lr_0 = 1.4290e-04
Loss = 1.0287e-03, PNorm = 174.4150, GNorm = 0.0837, lr_0 = 1.4280e-04
Loss = 8.5364e-04, PNorm = 174.4176, GNorm = 0.0585, lr_0 = 1.4270e-04
Loss = 9.3710e-04, PNorm = 174.4199, GNorm = 0.0454, lr_0 = 1.4261e-04
Loss = 2.0564e-03, PNorm = 174.4225, GNorm = 0.0499, lr_0 = 1.4251e-04
Loss = 1.1433e-03, PNorm = 174.4245, GNorm = 0.0619, lr_0 = 1.4241e-04
Loss = 1.4009e-03, PNorm = 174.4284, GNorm = 0.0946, lr_0 = 1.4231e-04
Loss = 1.9024e-03, PNorm = 174.4300, GNorm = 0.0999, lr_0 = 1.4222e-04
Loss = 3.8509e-03, PNorm = 174.4334, GNorm = 0.0615, lr_0 = 1.4212e-04
Loss = 2.2905e-03, PNorm = 174.4362, GNorm = 0.1811, lr_0 = 1.4202e-04
Loss = 1.1634e-03, PNorm = 174.4399, GNorm = 0.0654, lr_0 = 1.4192e-04
Loss = 2.7695e-03, PNorm = 174.4413, GNorm = 0.2019, lr_0 = 1.4183e-04
Loss = 2.1588e-03, PNorm = 174.4437, GNorm = 0.1567, lr_0 = 1.4173e-04
Loss = 9.3890e-04, PNorm = 174.4443, GNorm = 0.2671, lr_0 = 1.4163e-04
Loss = 9.5843e-04, PNorm = 174.4465, GNorm = 0.0980, lr_0 = 1.4153e-04
Loss = 1.3278e-03, PNorm = 174.4484, GNorm = 0.2261, lr_0 = 1.4144e-04
Loss = 2.1998e-03, PNorm = 174.4514, GNorm = 0.0637, lr_0 = 1.4134e-04
Loss = 8.7900e-04, PNorm = 174.4548, GNorm = 0.0509, lr_0 = 1.4124e-04
Loss = 1.4929e-03, PNorm = 174.4585, GNorm = 0.0499, lr_0 = 1.4115e-04
Loss = 2.3227e-03, PNorm = 174.4616, GNorm = 0.0501, lr_0 = 1.4105e-04
Loss = 1.2232e-03, PNorm = 174.4636, GNorm = 0.3285, lr_0 = 1.4095e-04
Loss = 1.0649e-03, PNorm = 174.4639, GNorm = 0.2116, lr_0 = 1.4086e-04
Loss = 3.5042e-03, PNorm = 174.4643, GNorm = 0.2244, lr_0 = 1.4076e-04
Loss = 3.0293e-03, PNorm = 174.4664, GNorm = 0.0631, lr_0 = 1.4066e-04
Loss = 1.0179e-03, PNorm = 174.4681, GNorm = 0.0470, lr_0 = 1.4057e-04
Loss = 1.5013e-03, PNorm = 174.4694, GNorm = 0.0641, lr_0 = 1.4047e-04
Loss = 1.3371e-03, PNorm = 174.4713, GNorm = 0.0607, lr_0 = 1.4038e-04
Loss = 2.0235e-03, PNorm = 174.4750, GNorm = 0.1087, lr_0 = 1.4028e-04
Loss = 1.0307e-03, PNorm = 174.4774, GNorm = 0.0969, lr_0 = 1.4018e-04
Loss = 3.4974e-03, PNorm = 174.4769, GNorm = 0.1332, lr_0 = 1.4009e-04
Loss = 1.3464e-03, PNorm = 174.4791, GNorm = 0.1311, lr_0 = 1.3999e-04
Loss = 1.9822e-03, PNorm = 174.4816, GNorm = 0.1221, lr_0 = 1.3990e-04
Loss = 9.9818e-04, PNorm = 174.4853, GNorm = 0.0527, lr_0 = 1.3980e-04
Loss = 1.9235e-03, PNorm = 174.4890, GNorm = 0.7426, lr_0 = 1.3970e-04
Loss = 9.7392e-04, PNorm = 174.4938, GNorm = 0.2230, lr_0 = 1.3961e-04
Loss = 1.9782e-03, PNorm = 174.4971, GNorm = 0.1222, lr_0 = 1.3951e-04
Loss = 1.8574e-03, PNorm = 174.5000, GNorm = 0.0672, lr_0 = 1.3942e-04
Loss = 1.5756e-03, PNorm = 174.5019, GNorm = 0.0711, lr_0 = 1.3932e-04
Loss = 1.3216e-03, PNorm = 174.5049, GNorm = 0.0382, lr_0 = 1.3923e-04
Loss = 1.1444e-03, PNorm = 174.5078, GNorm = 0.0381, lr_0 = 1.3913e-04
Loss = 2.2184e-03, PNorm = 174.5107, GNorm = 0.1464, lr_0 = 1.3904e-04
Loss = 1.1648e-03, PNorm = 174.5130, GNorm = 0.0379, lr_0 = 1.3894e-04
Validation mae = 0.278289
Epoch 26
Loss = 1.4959e-03, PNorm = 174.5141, GNorm = 0.1224, lr_0 = 1.3884e-04
Loss = 1.7594e-03, PNorm = 174.5132, GNorm = 0.0742, lr_0 = 1.3875e-04
Loss = 9.1771e-04, PNorm = 174.5149, GNorm = 0.2283, lr_0 = 1.3865e-04
Loss = 8.7330e-04, PNorm = 174.5174, GNorm = 0.1484, lr_0 = 1.3856e-04
Loss = 1.8232e-03, PNorm = 174.5195, GNorm = 0.3104, lr_0 = 1.3846e-04
Loss = 2.0789e-03, PNorm = 174.5221, GNorm = 0.1408, lr_0 = 1.3837e-04
Loss = 1.2362e-03, PNorm = 174.5250, GNorm = 0.2352, lr_0 = 1.3828e-04
Loss = 1.4537e-03, PNorm = 174.5253, GNorm = 0.0882, lr_0 = 1.3818e-04
Loss = 1.8205e-03, PNorm = 174.5263, GNorm = 0.1603, lr_0 = 1.3809e-04
Loss = 1.0153e-03, PNorm = 174.5265, GNorm = 0.2199, lr_0 = 1.3799e-04
Loss = 2.0406e-03, PNorm = 174.5269, GNorm = 0.2233, lr_0 = 1.3790e-04
Loss = 8.4854e-04, PNorm = 174.5283, GNorm = 0.0604, lr_0 = 1.3780e-04
Loss = 1.0240e-03, PNorm = 174.5304, GNorm = 0.1176, lr_0 = 1.3771e-04
Loss = 9.8620e-04, PNorm = 174.5322, GNorm = 0.0642, lr_0 = 1.3761e-04
Loss = 8.6540e-04, PNorm = 174.5333, GNorm = 0.0730, lr_0 = 1.3752e-04
Loss = 1.0979e-03, PNorm = 174.5343, GNorm = 0.0628, lr_0 = 1.3742e-04
Loss = 1.0074e-03, PNorm = 174.5354, GNorm = 0.1137, lr_0 = 1.3733e-04
Loss = 8.7732e-04, PNorm = 174.5373, GNorm = 0.0516, lr_0 = 1.3724e-04
Loss = 2.1635e-03, PNorm = 174.5407, GNorm = 0.0842, lr_0 = 1.3714e-04
Loss = 1.5756e-03, PNorm = 174.5428, GNorm = 0.1329, lr_0 = 1.3705e-04
Loss = 1.2454e-03, PNorm = 174.5464, GNorm = 0.1695, lr_0 = 1.3695e-04
Loss = 1.9953e-03, PNorm = 174.5490, GNorm = 0.0799, lr_0 = 1.3686e-04
Loss = 1.2652e-03, PNorm = 174.5508, GNorm = 0.1605, lr_0 = 1.3677e-04
Loss = 1.0771e-03, PNorm = 174.5527, GNorm = 0.1749, lr_0 = 1.3667e-04
Loss = 9.0504e-04, PNorm = 174.5561, GNorm = 0.2132, lr_0 = 1.3658e-04
Loss = 1.3576e-03, PNorm = 174.5585, GNorm = 0.0797, lr_0 = 1.3649e-04
Loss = 1.0032e-03, PNorm = 174.5602, GNorm = 0.1253, lr_0 = 1.3639e-04
Loss = 1.3781e-03, PNorm = 174.5605, GNorm = 0.0754, lr_0 = 1.3630e-04
Loss = 1.2146e-03, PNorm = 174.5632, GNorm = 0.1263, lr_0 = 1.3621e-04
Loss = 8.7968e-04, PNorm = 174.5654, GNorm = 0.1534, lr_0 = 1.3611e-04
Loss = 1.4026e-03, PNorm = 174.5677, GNorm = 0.1019, lr_0 = 1.3602e-04
Loss = 1.3114e-03, PNorm = 174.5698, GNorm = 0.0814, lr_0 = 1.3593e-04
Loss = 2.7981e-03, PNorm = 174.5718, GNorm = 0.2200, lr_0 = 1.3583e-04
Loss = 1.7423e-03, PNorm = 174.5731, GNorm = 0.0981, lr_0 = 1.3574e-04
Loss = 8.4244e-04, PNorm = 174.5733, GNorm = 0.1233, lr_0 = 1.3565e-04
Loss = 1.0565e-03, PNorm = 174.5739, GNorm = 0.1457, lr_0 = 1.3555e-04
Loss = 3.0508e-03, PNorm = 174.5745, GNorm = 0.0537, lr_0 = 1.3546e-04
Loss = 1.6219e-03, PNorm = 174.5737, GNorm = 0.1742, lr_0 = 1.3537e-04
Loss = 2.1297e-03, PNorm = 174.5766, GNorm = 0.1867, lr_0 = 1.3528e-04
Loss = 1.0307e-03, PNorm = 174.5781, GNorm = 0.1010, lr_0 = 1.3518e-04
Loss = 2.5446e-03, PNorm = 174.5812, GNorm = 0.2456, lr_0 = 1.3509e-04
Loss = 1.2308e-03, PNorm = 174.5829, GNorm = 0.0381, lr_0 = 1.3500e-04
Loss = 9.8695e-04, PNorm = 174.5866, GNorm = 0.0788, lr_0 = 1.3491e-04
Loss = 1.5603e-03, PNorm = 174.5896, GNorm = 0.1033, lr_0 = 1.3481e-04
Loss = 9.9889e-04, PNorm = 174.5911, GNorm = 0.0567, lr_0 = 1.3472e-04
Loss = 1.0135e-03, PNorm = 174.5917, GNorm = 0.1070, lr_0 = 1.3463e-04
Loss = 1.8562e-03, PNorm = 174.5911, GNorm = 0.0356, lr_0 = 1.3454e-04
Loss = 2.2375e-03, PNorm = 174.5915, GNorm = 0.0518, lr_0 = 1.3444e-04
Loss = 2.1672e-03, PNorm = 174.5931, GNorm = 0.0426, lr_0 = 1.3435e-04
Loss = 8.8282e-04, PNorm = 174.5956, GNorm = 0.0880, lr_0 = 1.3426e-04
Loss = 3.6743e-03, PNorm = 174.5981, GNorm = 0.4170, lr_0 = 1.3417e-04
Loss = 1.5815e-03, PNorm = 174.5984, GNorm = 0.0956, lr_0 = 1.3408e-04
Loss = 9.4077e-04, PNorm = 174.6007, GNorm = 0.1899, lr_0 = 1.3398e-04
Loss = 1.9839e-03, PNorm = 174.6018, GNorm = 0.2284, lr_0 = 1.3389e-04
Loss = 1.6176e-03, PNorm = 174.6059, GNorm = 0.0896, lr_0 = 1.3380e-04
Loss = 1.1272e-03, PNorm = 174.6079, GNorm = 0.1792, lr_0 = 1.3371e-04
Loss = 1.2580e-03, PNorm = 174.6098, GNorm = 0.0723, lr_0 = 1.3362e-04
Loss = 1.4569e-03, PNorm = 174.6104, GNorm = 0.0915, lr_0 = 1.3353e-04
Loss = 1.1928e-03, PNorm = 174.6122, GNorm = 0.0810, lr_0 = 1.3343e-04
Loss = 1.3981e-03, PNorm = 174.6149, GNorm = 0.0822, lr_0 = 1.3334e-04
Loss = 1.8419e-03, PNorm = 174.6179, GNorm = 0.0502, lr_0 = 1.3325e-04
Loss = 1.5015e-03, PNorm = 174.6202, GNorm = 0.0441, lr_0 = 1.3316e-04
Loss = 2.1148e-03, PNorm = 174.6210, GNorm = 0.1292, lr_0 = 1.3307e-04
Loss = 9.4466e-04, PNorm = 174.6228, GNorm = 0.1960, lr_0 = 1.3298e-04
Loss = 3.0720e-03, PNorm = 174.6249, GNorm = 0.0684, lr_0 = 1.3289e-04
Loss = 9.5803e-04, PNorm = 174.6257, GNorm = 0.0642, lr_0 = 1.3280e-04
Loss = 1.0765e-03, PNorm = 174.6269, GNorm = 0.0781, lr_0 = 1.3270e-04
Loss = 1.5210e-03, PNorm = 174.6290, GNorm = 0.1613, lr_0 = 1.3261e-04
Loss = 1.8016e-03, PNorm = 174.6313, GNorm = 0.2114, lr_0 = 1.3252e-04
Loss = 1.0392e-03, PNorm = 174.6341, GNorm = 0.2481, lr_0 = 1.3243e-04
Loss = 2.1318e-03, PNorm = 174.6336, GNorm = 0.1915, lr_0 = 1.3234e-04
Loss = 1.6578e-03, PNorm = 174.6359, GNorm = 0.1249, lr_0 = 1.3225e-04
Loss = 1.2499e-03, PNorm = 174.6372, GNorm = 0.0638, lr_0 = 1.3216e-04
Loss = 1.7492e-03, PNorm = 174.6410, GNorm = 0.2659, lr_0 = 1.3207e-04
Loss = 1.3667e-03, PNorm = 174.6433, GNorm = 0.1276, lr_0 = 1.3198e-04
Loss = 9.5075e-04, PNorm = 174.6452, GNorm = 0.1323, lr_0 = 1.3189e-04
Loss = 1.7480e-03, PNorm = 174.6472, GNorm = 0.2522, lr_0 = 1.3180e-04
Loss = 1.8413e-03, PNorm = 174.6482, GNorm = 0.1201, lr_0 = 1.3171e-04
Loss = 1.6352e-03, PNorm = 174.6507, GNorm = 0.0628, lr_0 = 1.3162e-04
Loss = 7.8216e-04, PNorm = 174.6538, GNorm = 0.1316, lr_0 = 1.3153e-04
Loss = 8.1374e-04, PNorm = 174.6556, GNorm = 0.2025, lr_0 = 1.3144e-04
Loss = 9.0758e-04, PNorm = 174.6563, GNorm = 0.1316, lr_0 = 1.3135e-04
Loss = 1.8961e-03, PNorm = 174.6563, GNorm = 0.2026, lr_0 = 1.3126e-04
Loss = 7.8682e-04, PNorm = 174.6569, GNorm = 0.1242, lr_0 = 1.3117e-04
Loss = 1.5798e-03, PNorm = 174.6570, GNorm = 0.0778, lr_0 = 1.3108e-04
Loss = 1.9169e-03, PNorm = 174.6583, GNorm = 0.0590, lr_0 = 1.3099e-04
Loss = 2.2677e-03, PNorm = 174.6602, GNorm = 0.0995, lr_0 = 1.3090e-04
Loss = 2.3984e-03, PNorm = 174.6616, GNorm = 0.2283, lr_0 = 1.3081e-04
Loss = 1.6621e-03, PNorm = 174.6621, GNorm = 0.1690, lr_0 = 1.3072e-04
Loss = 4.5826e-03, PNorm = 174.6660, GNorm = 0.4258, lr_0 = 1.3063e-04
Loss = 1.8723e-03, PNorm = 174.6707, GNorm = 0.1623, lr_0 = 1.3054e-04
Loss = 2.2982e-03, PNorm = 174.6738, GNorm = 0.1363, lr_0 = 1.3045e-04
Loss = 2.5611e-03, PNorm = 174.6770, GNorm = 0.1027, lr_0 = 1.3036e-04
Loss = 1.1235e-03, PNorm = 174.6789, GNorm = 0.0750, lr_0 = 1.3027e-04
Loss = 1.9704e-03, PNorm = 174.6815, GNorm = 0.0947, lr_0 = 1.3018e-04
Loss = 1.9646e-03, PNorm = 174.6841, GNorm = 0.1697, lr_0 = 1.3009e-04
Loss = 1.4716e-03, PNorm = 174.6874, GNorm = 0.2227, lr_0 = 1.3000e-04
Loss = 2.2245e-03, PNorm = 174.6887, GNorm = 0.1499, lr_0 = 1.2992e-04
Loss = 9.2316e-04, PNorm = 174.6905, GNorm = 0.1602, lr_0 = 1.2983e-04
Loss = 2.2467e-03, PNorm = 174.6911, GNorm = 0.1295, lr_0 = 1.2974e-04
Loss = 3.1675e-03, PNorm = 174.6952, GNorm = 0.1015, lr_0 = 1.2965e-04
Loss = 1.1114e-03, PNorm = 174.6988, GNorm = 0.0731, lr_0 = 1.2956e-04
Loss = 1.1264e-03, PNorm = 174.7024, GNorm = 0.0560, lr_0 = 1.2947e-04
Loss = 1.5017e-03, PNorm = 174.7041, GNorm = 0.0457, lr_0 = 1.2938e-04
Loss = 1.6305e-03, PNorm = 174.7066, GNorm = 0.0451, lr_0 = 1.2929e-04
Loss = 7.8666e-04, PNorm = 174.7081, GNorm = 0.0272, lr_0 = 1.2921e-04
Loss = 6.7022e-04, PNorm = 174.7099, GNorm = 0.0494, lr_0 = 1.2912e-04
Loss = 7.6706e-04, PNorm = 174.7121, GNorm = 0.1139, lr_0 = 1.2903e-04
Loss = 2.4440e-03, PNorm = 174.7149, GNorm = 0.8798, lr_0 = 1.2894e-04
Loss = 1.6027e-03, PNorm = 174.7154, GNorm = 0.0261, lr_0 = 1.2885e-04
Loss = 8.7298e-04, PNorm = 174.7160, GNorm = 0.1093, lr_0 = 1.2876e-04
Loss = 9.3811e-04, PNorm = 174.7159, GNorm = 0.0462, lr_0 = 1.2867e-04
Loss = 8.8001e-04, PNorm = 174.7183, GNorm = 0.2356, lr_0 = 1.2859e-04
Loss = 1.3882e-03, PNorm = 174.7188, GNorm = 0.0716, lr_0 = 1.2850e-04
Loss = 3.2361e-03, PNorm = 174.7202, GNorm = 0.2355, lr_0 = 1.2841e-04
Loss = 9.5163e-04, PNorm = 174.7233, GNorm = 0.0752, lr_0 = 1.2832e-04
Loss = 2.3201e-03, PNorm = 174.7241, GNorm = 0.3579, lr_0 = 1.2823e-04
Loss = 8.8887e-04, PNorm = 174.7252, GNorm = 0.0553, lr_0 = 1.2815e-04
Loss = 2.0507e-03, PNorm = 174.7276, GNorm = 0.0710, lr_0 = 1.2806e-04
Loss = 1.4954e-03, PNorm = 174.7302, GNorm = 0.0960, lr_0 = 1.2797e-04
Validation mae = 0.278507
Epoch 27
Loss = 1.1362e-03, PNorm = 174.7322, GNorm = 0.1845, lr_0 = 1.2788e-04
Loss = 7.5176e-04, PNorm = 174.7322, GNorm = 0.0757, lr_0 = 1.2780e-04
Loss = 1.0632e-03, PNorm = 174.7319, GNorm = 0.1599, lr_0 = 1.2771e-04
Loss = 6.6842e-04, PNorm = 174.7324, GNorm = 0.0444, lr_0 = 1.2762e-04
Loss = 2.2495e-03, PNorm = 174.7340, GNorm = 0.0962, lr_0 = 1.2753e-04
Loss = 8.1427e-04, PNorm = 174.7352, GNorm = 0.1342, lr_0 = 1.2745e-04
Loss = 8.3009e-04, PNorm = 174.7389, GNorm = 0.0966, lr_0 = 1.2736e-04
Loss = 9.7673e-04, PNorm = 174.7400, GNorm = 0.1128, lr_0 = 1.2727e-04
Loss = 7.3297e-04, PNorm = 174.7410, GNorm = 0.1438, lr_0 = 1.2718e-04
Loss = 2.7346e-03, PNorm = 174.7430, GNorm = 0.0552, lr_0 = 1.2710e-04
Loss = 8.9601e-04, PNorm = 174.7438, GNorm = 0.1132, lr_0 = 1.2701e-04
Loss = 1.4063e-03, PNorm = 174.7457, GNorm = 0.0773, lr_0 = 1.2692e-04
Loss = 7.9287e-04, PNorm = 174.7471, GNorm = 0.1178, lr_0 = 1.2684e-04
Loss = 1.2925e-03, PNorm = 174.7475, GNorm = 0.1020, lr_0 = 1.2675e-04
Loss = 2.1560e-03, PNorm = 174.7490, GNorm = 0.0572, lr_0 = 1.2666e-04
Loss = 3.0319e-03, PNorm = 174.7484, GNorm = 0.0823, lr_0 = 1.2658e-04
Loss = 2.6683e-03, PNorm = 174.7499, GNorm = 0.1039, lr_0 = 1.2649e-04
Loss = 1.0841e-03, PNorm = 174.7515, GNorm = 0.1775, lr_0 = 1.2640e-04
Loss = 1.2406e-03, PNorm = 174.7538, GNorm = 0.1171, lr_0 = 1.2632e-04
Loss = 7.1721e-04, PNorm = 174.7566, GNorm = 0.1115, lr_0 = 1.2623e-04
Loss = 7.9591e-04, PNorm = 174.7587, GNorm = 0.0611, lr_0 = 1.2614e-04
Loss = 9.0259e-04, PNorm = 174.7600, GNorm = 0.0499, lr_0 = 1.2606e-04
Loss = 1.3548e-03, PNorm = 174.7601, GNorm = 0.0527, lr_0 = 1.2597e-04
Loss = 1.2329e-03, PNorm = 174.7599, GNorm = 0.1331, lr_0 = 1.2588e-04
Loss = 1.2872e-03, PNorm = 174.7612, GNorm = 0.0847, lr_0 = 1.2580e-04
Loss = 7.6735e-04, PNorm = 174.7629, GNorm = 0.1154, lr_0 = 1.2571e-04
Loss = 1.4868e-03, PNorm = 174.7650, GNorm = 1.0090, lr_0 = 1.2563e-04
Loss = 3.1603e-03, PNorm = 174.7655, GNorm = 0.2603, lr_0 = 1.2554e-04
Loss = 1.4547e-03, PNorm = 174.7664, GNorm = 0.0295, lr_0 = 1.2545e-04
Loss = 9.6905e-04, PNorm = 174.7687, GNorm = 0.1622, lr_0 = 1.2537e-04
Loss = 1.2614e-03, PNorm = 174.7714, GNorm = 0.1510, lr_0 = 1.2528e-04
Loss = 7.2904e-04, PNorm = 174.7741, GNorm = 0.0816, lr_0 = 1.2520e-04
Loss = 7.0545e-04, PNorm = 174.7761, GNorm = 0.0642, lr_0 = 1.2511e-04
Loss = 8.3084e-04, PNorm = 174.7778, GNorm = 0.0428, lr_0 = 1.2502e-04
Loss = 1.6652e-03, PNorm = 174.7811, GNorm = 0.1540, lr_0 = 1.2494e-04
Loss = 6.6467e-04, PNorm = 174.7832, GNorm = 0.2631, lr_0 = 1.2485e-04
Loss = 6.5745e-04, PNorm = 174.7844, GNorm = 0.1114, lr_0 = 1.2477e-04
Loss = 1.3349e-03, PNorm = 174.7850, GNorm = 0.1877, lr_0 = 1.2468e-04
Loss = 1.5707e-03, PNorm = 174.7850, GNorm = 0.0355, lr_0 = 1.2460e-04
Loss = 8.3465e-04, PNorm = 174.7863, GNorm = 0.0726, lr_0 = 1.2451e-04
Loss = 9.8454e-04, PNorm = 174.7886, GNorm = 0.0530, lr_0 = 1.2443e-04
Loss = 9.9560e-04, PNorm = 174.7917, GNorm = 0.3027, lr_0 = 1.2434e-04
Loss = 1.7278e-03, PNorm = 174.7944, GNorm = 0.0769, lr_0 = 1.2426e-04
Loss = 2.1193e-03, PNorm = 174.7963, GNorm = 0.1406, lr_0 = 1.2417e-04
Loss = 1.0262e-03, PNorm = 174.7974, GNorm = 0.0859, lr_0 = 1.2409e-04
Loss = 1.0037e-03, PNorm = 174.7986, GNorm = 0.0793, lr_0 = 1.2400e-04
Loss = 2.8669e-03, PNorm = 174.8002, GNorm = 0.2632, lr_0 = 1.2392e-04
Loss = 1.0369e-03, PNorm = 174.8031, GNorm = 0.0781, lr_0 = 1.2383e-04
Loss = 1.2096e-03, PNorm = 174.8049, GNorm = 0.1184, lr_0 = 1.2375e-04
Loss = 9.8662e-04, PNorm = 174.8067, GNorm = 0.0779, lr_0 = 1.2366e-04
Loss = 7.1561e-04, PNorm = 174.8069, GNorm = 0.1366, lr_0 = 1.2358e-04
Loss = 2.4849e-03, PNorm = 174.8083, GNorm = 0.0661, lr_0 = 1.2349e-04
Loss = 8.9202e-04, PNorm = 174.8089, GNorm = 0.0223, lr_0 = 1.2341e-04
Loss = 2.0539e-03, PNorm = 174.8112, GNorm = 0.0728, lr_0 = 1.2332e-04
Loss = 7.9140e-04, PNorm = 174.8150, GNorm = 0.0645, lr_0 = 1.2324e-04
Loss = 1.1477e-03, PNorm = 174.8171, GNorm = 0.1201, lr_0 = 1.2315e-04
Loss = 1.1566e-03, PNorm = 174.8185, GNorm = 0.1303, lr_0 = 1.2307e-04
Loss = 1.0562e-03, PNorm = 174.8185, GNorm = 0.0807, lr_0 = 1.2298e-04
Loss = 1.1704e-03, PNorm = 174.8204, GNorm = 0.1994, lr_0 = 1.2290e-04
Loss = 8.0088e-04, PNorm = 174.8219, GNorm = 0.0662, lr_0 = 1.2282e-04
Loss = 3.4580e-03, PNorm = 174.8249, GNorm = 0.1141, lr_0 = 1.2273e-04
Loss = 1.6646e-03, PNorm = 174.8267, GNorm = 0.0584, lr_0 = 1.2265e-04
Loss = 9.2645e-04, PNorm = 174.8279, GNorm = 0.0638, lr_0 = 1.2256e-04
Loss = 2.0829e-03, PNorm = 174.8297, GNorm = 0.0353, lr_0 = 1.2248e-04
Loss = 9.4730e-04, PNorm = 174.8299, GNorm = 0.0732, lr_0 = 1.2240e-04
Loss = 2.3221e-03, PNorm = 174.8316, GNorm = 0.1761, lr_0 = 1.2231e-04
Loss = 1.5258e-03, PNorm = 174.8325, GNorm = 0.1981, lr_0 = 1.2223e-04
Loss = 1.6621e-03, PNorm = 174.8356, GNorm = 0.0256, lr_0 = 1.2214e-04
Loss = 2.0670e-03, PNorm = 174.8375, GNorm = 0.0626, lr_0 = 1.2206e-04
Loss = 1.6371e-03, PNorm = 174.8393, GNorm = 0.2463, lr_0 = 1.2198e-04
Loss = 6.0448e-04, PNorm = 174.8408, GNorm = 0.0937, lr_0 = 1.2189e-04
Loss = 1.2778e-03, PNorm = 174.8427, GNorm = 0.0594, lr_0 = 1.2181e-04
Loss = 2.1332e-03, PNorm = 174.8446, GNorm = 0.1091, lr_0 = 1.2173e-04
Loss = 9.5652e-04, PNorm = 174.8450, GNorm = 0.1050, lr_0 = 1.2164e-04
Loss = 9.4818e-04, PNorm = 174.8458, GNorm = 0.1617, lr_0 = 1.2156e-04
Loss = 1.2298e-03, PNorm = 174.8474, GNorm = 0.1242, lr_0 = 1.2148e-04
Loss = 1.8279e-03, PNorm = 174.8471, GNorm = 0.1540, lr_0 = 1.2139e-04
Loss = 9.3771e-04, PNorm = 174.8478, GNorm = 0.2231, lr_0 = 1.2131e-04
Loss = 8.7672e-04, PNorm = 174.8485, GNorm = 0.0465, lr_0 = 1.2123e-04
Loss = 6.5274e-04, PNorm = 174.8500, GNorm = 0.1190, lr_0 = 1.2114e-04
Loss = 2.1708e-03, PNorm = 174.8519, GNorm = 0.1565, lr_0 = 1.2106e-04
Loss = 7.5938e-04, PNorm = 174.8540, GNorm = 0.1151, lr_0 = 1.2098e-04
Loss = 9.3239e-04, PNorm = 174.8575, GNorm = 0.1187, lr_0 = 1.2090e-04
Loss = 1.0360e-03, PNorm = 174.8611, GNorm = 0.1181, lr_0 = 1.2081e-04
Loss = 6.5390e-04, PNorm = 174.8620, GNorm = 0.1307, lr_0 = 1.2073e-04
Loss = 2.0903e-03, PNorm = 174.8621, GNorm = 0.1304, lr_0 = 1.2065e-04
Loss = 1.3476e-03, PNorm = 174.8633, GNorm = 0.0742, lr_0 = 1.2056e-04
Loss = 1.0833e-03, PNorm = 174.8651, GNorm = 0.1092, lr_0 = 1.2048e-04
Loss = 6.9941e-04, PNorm = 174.8670, GNorm = 0.0636, lr_0 = 1.2040e-04
Loss = 1.3049e-03, PNorm = 174.8684, GNorm = 0.0897, lr_0 = 1.2032e-04
Loss = 1.4138e-03, PNorm = 174.8699, GNorm = 0.0680, lr_0 = 1.2023e-04
Loss = 2.8819e-03, PNorm = 174.8721, GNorm = 0.0678, lr_0 = 1.2015e-04
Loss = 2.6065e-03, PNorm = 174.8730, GNorm = 0.0771, lr_0 = 1.2007e-04
Loss = 9.6902e-04, PNorm = 174.8755, GNorm = 0.0622, lr_0 = 1.1999e-04
Loss = 1.5592e-03, PNorm = 174.8787, GNorm = 0.0875, lr_0 = 1.1991e-04
Loss = 2.7386e-03, PNorm = 174.8808, GNorm = 0.0995, lr_0 = 1.1982e-04
Loss = 1.2422e-03, PNorm = 174.8803, GNorm = 0.0910, lr_0 = 1.1974e-04
Loss = 1.7410e-03, PNorm = 174.8817, GNorm = 0.0333, lr_0 = 1.1966e-04
Loss = 6.9651e-04, PNorm = 174.8827, GNorm = 0.0629, lr_0 = 1.1958e-04
Loss = 1.3572e-03, PNorm = 174.8840, GNorm = 0.0482, lr_0 = 1.1950e-04
Loss = 2.0850e-03, PNorm = 174.8851, GNorm = 0.0516, lr_0 = 1.1941e-04
Loss = 4.0389e-03, PNorm = 174.8869, GNorm = 0.0783, lr_0 = 1.1933e-04
Loss = 2.0430e-03, PNorm = 174.8890, GNorm = 0.1397, lr_0 = 1.1925e-04
Loss = 2.0167e-03, PNorm = 174.8911, GNorm = 0.3171, lr_0 = 1.1917e-04
Loss = 6.3763e-04, PNorm = 174.8934, GNorm = 0.0801, lr_0 = 1.1909e-04
Loss = 7.2283e-04, PNorm = 174.8954, GNorm = 0.1300, lr_0 = 1.1901e-04
Loss = 4.2272e-03, PNorm = 174.8969, GNorm = 0.1204, lr_0 = 1.1892e-04
Loss = 6.6924e-04, PNorm = 174.8989, GNorm = 0.1080, lr_0 = 1.1884e-04
Loss = 1.2006e-03, PNorm = 174.9000, GNorm = 0.0279, lr_0 = 1.1876e-04
Loss = 9.5929e-04, PNorm = 174.9023, GNorm = 0.0542, lr_0 = 1.1868e-04
Loss = 2.5959e-03, PNorm = 174.9030, GNorm = 0.7685, lr_0 = 1.1860e-04
Loss = 1.6817e-03, PNorm = 174.9031, GNorm = 0.1837, lr_0 = 1.1852e-04
Loss = 1.0664e-03, PNorm = 174.9047, GNorm = 0.0363, lr_0 = 1.1844e-04
Loss = 2.2293e-03, PNorm = 174.9071, GNorm = 0.0846, lr_0 = 1.1835e-04
Loss = 1.2113e-03, PNorm = 174.9095, GNorm = 0.1675, lr_0 = 1.1827e-04
Loss = 8.8834e-04, PNorm = 174.9119, GNorm = 0.1219, lr_0 = 1.1819e-04
Loss = 1.3067e-03, PNorm = 174.9137, GNorm = 0.0534, lr_0 = 1.1811e-04
Loss = 2.2703e-03, PNorm = 174.9162, GNorm = 0.1192, lr_0 = 1.1803e-04
Loss = 1.4903e-03, PNorm = 174.9189, GNorm = 0.2270, lr_0 = 1.1795e-04
Loss = 7.2078e-04, PNorm = 174.9208, GNorm = 0.0339, lr_0 = 1.1787e-04
Validation mae = 0.278390
Epoch 28
Loss = 9.7776e-04, PNorm = 174.9227, GNorm = 0.0386, lr_0 = 1.1779e-04
Loss = 7.0756e-04, PNorm = 174.9229, GNorm = 0.1534, lr_0 = 1.1771e-04
Loss = 1.8702e-03, PNorm = 174.9242, GNorm = 0.0785, lr_0 = 1.1763e-04
Loss = 1.0943e-03, PNorm = 174.9262, GNorm = 0.1083, lr_0 = 1.1755e-04
Loss = 7.4653e-04, PNorm = 174.9279, GNorm = 0.0808, lr_0 = 1.1747e-04
Loss = 1.5505e-03, PNorm = 174.9288, GNorm = 0.1049, lr_0 = 1.1739e-04
Loss = 1.5831e-03, PNorm = 174.9300, GNorm = 0.4071, lr_0 = 1.1730e-04
Loss = 7.2294e-04, PNorm = 174.9318, GNorm = 0.1533, lr_0 = 1.1722e-04
Loss = 1.1315e-03, PNorm = 174.9331, GNorm = 0.1202, lr_0 = 1.1714e-04
Loss = 1.0943e-03, PNorm = 174.9339, GNorm = 0.0899, lr_0 = 1.1706e-04
Loss = 7.0933e-04, PNorm = 174.9342, GNorm = 0.0244, lr_0 = 1.1698e-04
Loss = 7.5136e-04, PNorm = 174.9352, GNorm = 0.0814, lr_0 = 1.1690e-04
Loss = 1.3605e-03, PNorm = 174.9360, GNorm = 0.0564, lr_0 = 1.1682e-04
Loss = 2.1412e-03, PNorm = 174.9360, GNorm = 0.0875, lr_0 = 1.1674e-04
Loss = 1.0248e-03, PNorm = 174.9363, GNorm = 0.0718, lr_0 = 1.1666e-04
Loss = 1.5559e-03, PNorm = 174.9361, GNorm = 0.0514, lr_0 = 1.1658e-04
Loss = 2.6404e-03, PNorm = 174.9379, GNorm = 0.1574, lr_0 = 1.1650e-04
Loss = 1.9570e-03, PNorm = 174.9396, GNorm = 0.0753, lr_0 = 1.1642e-04
Loss = 1.8232e-03, PNorm = 174.9415, GNorm = 0.0356, lr_0 = 1.1634e-04
Loss = 7.0755e-04, PNorm = 174.9424, GNorm = 0.1218, lr_0 = 1.1626e-04
Loss = 6.4557e-04, PNorm = 174.9432, GNorm = 0.0971, lr_0 = 1.1618e-04
Loss = 2.1239e-03, PNorm = 174.9432, GNorm = 0.7313, lr_0 = 1.1611e-04
Loss = 9.3384e-04, PNorm = 174.9436, GNorm = 0.1509, lr_0 = 1.1603e-04
Loss = 1.4741e-03, PNorm = 174.9455, GNorm = 0.0565, lr_0 = 1.1595e-04
Loss = 1.5637e-03, PNorm = 174.9465, GNorm = 0.0760, lr_0 = 1.1587e-04
Loss = 5.7978e-04, PNorm = 174.9465, GNorm = 0.1296, lr_0 = 1.1579e-04
Loss = 6.6666e-04, PNorm = 174.9462, GNorm = 0.0328, lr_0 = 1.1571e-04
Loss = 1.0826e-03, PNorm = 174.9458, GNorm = 0.1179, lr_0 = 1.1563e-04
Loss = 6.5398e-04, PNorm = 174.9475, GNorm = 0.1162, lr_0 = 1.1555e-04
Loss = 4.0314e-03, PNorm = 174.9481, GNorm = 0.3391, lr_0 = 1.1547e-04
Loss = 1.5014e-03, PNorm = 174.9485, GNorm = 0.0291, lr_0 = 1.1539e-04
Loss = 2.0016e-03, PNorm = 174.9503, GNorm = 0.0696, lr_0 = 1.1531e-04
Loss = 6.8713e-04, PNorm = 174.9526, GNorm = 0.0462, lr_0 = 1.1523e-04
Loss = 5.0670e-04, PNorm = 174.9537, GNorm = 0.1032, lr_0 = 1.1515e-04
Loss = 9.4656e-04, PNorm = 174.9561, GNorm = 0.2625, lr_0 = 1.1508e-04
Loss = 2.9369e-03, PNorm = 174.9598, GNorm = 0.1055, lr_0 = 1.1500e-04
Loss = 1.3435e-03, PNorm = 174.9617, GNorm = 0.1508, lr_0 = 1.1492e-04
Loss = 5.2928e-04, PNorm = 174.9635, GNorm = 0.0926, lr_0 = 1.1484e-04
Loss = 8.9349e-04, PNorm = 174.9645, GNorm = 0.1568, lr_0 = 1.1476e-04
Loss = 1.4147e-03, PNorm = 174.9649, GNorm = 0.0453, lr_0 = 1.1468e-04
Loss = 1.4065e-03, PNorm = 174.9661, GNorm = 0.1740, lr_0 = 1.1460e-04
Loss = 1.3844e-03, PNorm = 174.9670, GNorm = 0.0255, lr_0 = 1.1452e-04
Loss = 2.8029e-03, PNorm = 174.9697, GNorm = 0.1322, lr_0 = 1.1445e-04
Loss = 8.5165e-04, PNorm = 174.9707, GNorm = 0.1177, lr_0 = 1.1437e-04
Loss = 5.6294e-04, PNorm = 174.9714, GNorm = 0.0505, lr_0 = 1.1429e-04
Loss = 8.7794e-04, PNorm = 174.9719, GNorm = 0.0586, lr_0 = 1.1421e-04
Loss = 6.3348e-04, PNorm = 174.9737, GNorm = 0.0503, lr_0 = 1.1413e-04
Loss = 9.0007e-04, PNorm = 174.9749, GNorm = 0.0807, lr_0 = 1.1405e-04
Loss = 7.7585e-04, PNorm = 174.9761, GNorm = 0.0972, lr_0 = 1.1398e-04
Loss = 1.0885e-03, PNorm = 174.9775, GNorm = 0.0878, lr_0 = 1.1390e-04
Loss = 1.5800e-03, PNorm = 174.9791, GNorm = 0.1582, lr_0 = 1.1382e-04
Loss = 1.1839e-03, PNorm = 174.9798, GNorm = 0.1285, lr_0 = 1.1374e-04
Loss = 1.8192e-03, PNorm = 174.9795, GNorm = 0.4118, lr_0 = 1.1366e-04
Loss = 1.7401e-03, PNorm = 174.9798, GNorm = 0.0908, lr_0 = 1.1359e-04
Loss = 1.7533e-03, PNorm = 174.9794, GNorm = 0.2303, lr_0 = 1.1351e-04
Loss = 1.9459e-03, PNorm = 174.9796, GNorm = 0.0396, lr_0 = 1.1343e-04
Loss = 2.6675e-03, PNorm = 174.9810, GNorm = 0.0948, lr_0 = 1.1335e-04
Loss = 1.9430e-03, PNorm = 174.9849, GNorm = 0.1307, lr_0 = 1.1328e-04
Loss = 7.5049e-04, PNorm = 174.9897, GNorm = 0.1249, lr_0 = 1.1320e-04
Loss = 1.5085e-03, PNorm = 174.9921, GNorm = 0.0445, lr_0 = 1.1312e-04
Loss = 1.0526e-03, PNorm = 174.9938, GNorm = 0.0931, lr_0 = 1.1304e-04
Loss = 1.5371e-03, PNorm = 174.9955, GNorm = 0.0883, lr_0 = 1.1297e-04
Loss = 1.0873e-03, PNorm = 174.9989, GNorm = 0.1255, lr_0 = 1.1289e-04
Loss = 1.4265e-03, PNorm = 174.9987, GNorm = 0.1501, lr_0 = 1.1281e-04
Loss = 7.0387e-04, PNorm = 174.9989, GNorm = 0.0352, lr_0 = 1.1273e-04
Loss = 6.9948e-04, PNorm = 174.9995, GNorm = 0.1406, lr_0 = 1.1266e-04
Loss = 7.3771e-04, PNorm = 175.0009, GNorm = 0.0287, lr_0 = 1.1258e-04
Loss = 6.3582e-04, PNorm = 175.0019, GNorm = 0.1283, lr_0 = 1.1250e-04
Loss = 2.4433e-03, PNorm = 175.0037, GNorm = 0.0891, lr_0 = 1.1243e-04
Loss = 1.7698e-03, PNorm = 175.0053, GNorm = 0.1617, lr_0 = 1.1235e-04
Loss = 6.5122e-04, PNorm = 175.0077, GNorm = 0.1190, lr_0 = 1.1227e-04
Loss = 1.0025e-03, PNorm = 175.0079, GNorm = 0.0625, lr_0 = 1.1219e-04
Loss = 5.8863e-04, PNorm = 175.0082, GNorm = 0.1673, lr_0 = 1.1212e-04
Loss = 2.1700e-03, PNorm = 175.0090, GNorm = 0.1376, lr_0 = 1.1204e-04
Loss = 5.8385e-04, PNorm = 175.0106, GNorm = 0.0712, lr_0 = 1.1196e-04
Loss = 7.4822e-04, PNorm = 175.0109, GNorm = 0.1621, lr_0 = 1.1189e-04
Loss = 1.2252e-03, PNorm = 175.0121, GNorm = 0.0421, lr_0 = 1.1181e-04
Loss = 1.0914e-03, PNorm = 175.0125, GNorm = 0.0291, lr_0 = 1.1173e-04
Loss = 3.3602e-03, PNorm = 175.0135, GNorm = 0.1242, lr_0 = 1.1166e-04
Loss = 9.2527e-04, PNorm = 175.0154, GNorm = 0.0569, lr_0 = 1.1158e-04
Loss = 6.1203e-04, PNorm = 175.0170, GNorm = 0.1270, lr_0 = 1.1150e-04
Loss = 1.0037e-03, PNorm = 175.0189, GNorm = 0.1725, lr_0 = 1.1143e-04
Loss = 2.2838e-03, PNorm = 175.0211, GNorm = 0.1221, lr_0 = 1.1135e-04
Loss = 7.4338e-04, PNorm = 175.0226, GNorm = 0.0692, lr_0 = 1.1128e-04
Loss = 5.5077e-04, PNorm = 175.0245, GNorm = 0.0440, lr_0 = 1.1120e-04
Loss = 1.2257e-03, PNorm = 175.0251, GNorm = 0.0994, lr_0 = 1.1112e-04
Loss = 8.7840e-04, PNorm = 175.0277, GNorm = 0.0471, lr_0 = 1.1105e-04
Loss = 1.9728e-03, PNorm = 175.0305, GNorm = 0.1670, lr_0 = 1.1097e-04
Loss = 7.2845e-04, PNorm = 175.0332, GNorm = 0.0590, lr_0 = 1.1089e-04
Loss = 6.2920e-04, PNorm = 175.0342, GNorm = 0.0549, lr_0 = 1.1082e-04
Loss = 1.7796e-03, PNorm = 175.0354, GNorm = 0.0403, lr_0 = 1.1074e-04
Loss = 6.3531e-04, PNorm = 175.0357, GNorm = 0.0292, lr_0 = 1.1067e-04
Loss = 9.9087e-04, PNorm = 175.0374, GNorm = 0.1008, lr_0 = 1.1059e-04
Loss = 6.9842e-04, PNorm = 175.0396, GNorm = 0.1082, lr_0 = 1.1052e-04
Loss = 8.7487e-04, PNorm = 175.0400, GNorm = 0.1163, lr_0 = 1.1044e-04
Loss = 6.5883e-04, PNorm = 175.0410, GNorm = 0.0751, lr_0 = 1.1036e-04
Loss = 1.3596e-03, PNorm = 175.0410, GNorm = 0.0723, lr_0 = 1.1029e-04
Loss = 9.2813e-04, PNorm = 175.0420, GNorm = 0.0747, lr_0 = 1.1021e-04
Loss = 1.9396e-03, PNorm = 175.0435, GNorm = 0.0827, lr_0 = 1.1014e-04
Loss = 2.1641e-03, PNorm = 175.0459, GNorm = 0.1813, lr_0 = 1.1006e-04
Loss = 9.0792e-04, PNorm = 175.0478, GNorm = 0.1432, lr_0 = 1.0999e-04
Loss = 8.6349e-04, PNorm = 175.0510, GNorm = 0.0682, lr_0 = 1.0991e-04
Loss = 1.5330e-03, PNorm = 175.0537, GNorm = 0.0707, lr_0 = 1.0984e-04
Loss = 3.1004e-03, PNorm = 175.0543, GNorm = 0.0347, lr_0 = 1.0976e-04
Loss = 1.3357e-03, PNorm = 175.0552, GNorm = 0.1416, lr_0 = 1.0969e-04
Loss = 1.5126e-03, PNorm = 175.0562, GNorm = 0.2089, lr_0 = 1.0961e-04
Loss = 1.5637e-03, PNorm = 175.0583, GNorm = 0.0921, lr_0 = 1.0954e-04
Loss = 1.3993e-03, PNorm = 175.0591, GNorm = 0.1153, lr_0 = 1.0946e-04
Loss = 2.9182e-03, PNorm = 175.0620, GNorm = 0.1414, lr_0 = 1.0939e-04
Loss = 8.8428e-04, PNorm = 175.0637, GNorm = 0.0500, lr_0 = 1.0931e-04
Loss = 9.6758e-04, PNorm = 175.0661, GNorm = 0.1109, lr_0 = 1.0924e-04
Loss = 7.0366e-04, PNorm = 175.0663, GNorm = 0.0350, lr_0 = 1.0916e-04
Loss = 7.0517e-04, PNorm = 175.0680, GNorm = 0.1257, lr_0 = 1.0909e-04
Loss = 6.7919e-04, PNorm = 175.0680, GNorm = 0.0629, lr_0 = 1.0901e-04
Loss = 1.4643e-03, PNorm = 175.0687, GNorm = 0.0419, lr_0 = 1.0894e-04
Loss = 1.3511e-03, PNorm = 175.0705, GNorm = 0.1295, lr_0 = 1.0886e-04
Loss = 2.1690e-03, PNorm = 175.0731, GNorm = 0.0548, lr_0 = 1.0879e-04
Loss = 1.3285e-03, PNorm = 175.0760, GNorm = 0.0731, lr_0 = 1.0871e-04
Loss = 2.4875e-03, PNorm = 175.0778, GNorm = 0.1524, lr_0 = 1.0864e-04
Loss = 9.4060e-04, PNorm = 175.0806, GNorm = 0.0997, lr_0 = 1.0856e-04
Validation mae = 0.278469
Epoch 29
Loss = 5.8599e-04, PNorm = 175.0825, GNorm = 0.0845, lr_0 = 1.0849e-04
Loss = 1.7223e-03, PNorm = 175.0826, GNorm = 0.0615, lr_0 = 1.0841e-04
Loss = 5.1264e-04, PNorm = 175.0828, GNorm = 0.0716, lr_0 = 1.0834e-04
Loss = 1.5349e-03, PNorm = 175.0828, GNorm = 0.0795, lr_0 = 1.0827e-04
Loss = 1.8667e-03, PNorm = 175.0846, GNorm = 0.1122, lr_0 = 1.0819e-04
Loss = 5.8155e-04, PNorm = 175.0851, GNorm = 0.1207, lr_0 = 1.0812e-04
Loss = 5.5330e-04, PNorm = 175.0860, GNorm = 0.0296, lr_0 = 1.0804e-04
Loss = 8.8953e-04, PNorm = 175.0862, GNorm = 0.0483, lr_0 = 1.0797e-04
Loss = 1.2398e-03, PNorm = 175.0877, GNorm = 0.0336, lr_0 = 1.0790e-04
Loss = 9.1324e-04, PNorm = 175.0894, GNorm = 0.0246, lr_0 = 1.0782e-04
Loss = 9.8291e-04, PNorm = 175.0906, GNorm = 0.0783, lr_0 = 1.0775e-04
Loss = 1.8235e-03, PNorm = 175.0910, GNorm = 0.1294, lr_0 = 1.0767e-04
Loss = 6.9084e-04, PNorm = 175.0916, GNorm = 0.0714, lr_0 = 1.0760e-04
Loss = 1.0863e-03, PNorm = 175.0913, GNorm = 0.0524, lr_0 = 1.0753e-04
Loss = 9.3934e-04, PNorm = 175.0914, GNorm = 0.0539, lr_0 = 1.0745e-04
Loss = 6.4875e-04, PNorm = 175.0919, GNorm = 0.0552, lr_0 = 1.0738e-04
Loss = 5.0306e-04, PNorm = 175.0922, GNorm = 0.1099, lr_0 = 1.0731e-04
Loss = 1.6808e-03, PNorm = 175.0941, GNorm = 0.0680, lr_0 = 1.0723e-04
Loss = 7.4459e-04, PNorm = 175.0956, GNorm = 0.0646, lr_0 = 1.0716e-04
Loss = 2.4287e-03, PNorm = 175.0974, GNorm = 0.0199, lr_0 = 1.0709e-04
Loss = 1.6091e-03, PNorm = 175.0994, GNorm = 0.0398, lr_0 = 1.0701e-04
Loss = 1.1308e-03, PNorm = 175.1012, GNorm = 0.1550, lr_0 = 1.0694e-04
Loss = 2.0337e-03, PNorm = 175.1018, GNorm = 0.0508, lr_0 = 1.0687e-04
Loss = 1.4161e-03, PNorm = 175.1027, GNorm = 0.0557, lr_0 = 1.0679e-04
Loss = 9.0677e-04, PNorm = 175.1031, GNorm = 0.1887, lr_0 = 1.0672e-04
Loss = 2.2736e-03, PNorm = 175.1046, GNorm = 0.0722, lr_0 = 1.0665e-04
Loss = 7.2990e-04, PNorm = 175.1050, GNorm = 0.1160, lr_0 = 1.0657e-04
Loss = 4.6278e-04, PNorm = 175.1049, GNorm = 0.0262, lr_0 = 1.0650e-04
Loss = 4.8477e-04, PNorm = 175.1065, GNorm = 0.0867, lr_0 = 1.0643e-04
Loss = 5.6710e-04, PNorm = 175.1075, GNorm = 0.0807, lr_0 = 1.0635e-04
Loss = 6.8276e-04, PNorm = 175.1090, GNorm = 0.0584, lr_0 = 1.0628e-04
Loss = 1.4062e-03, PNorm = 175.1102, GNorm = 0.0390, lr_0 = 1.0621e-04
Loss = 6.0713e-04, PNorm = 175.1098, GNorm = 0.0280, lr_0 = 1.0614e-04
Loss = 1.6245e-03, PNorm = 175.1095, GNorm = 0.0868, lr_0 = 1.0606e-04
Loss = 8.4379e-04, PNorm = 175.1092, GNorm = 0.0422, lr_0 = 1.0599e-04
Loss = 1.2925e-03, PNorm = 175.1098, GNorm = 0.1548, lr_0 = 1.0592e-04
Loss = 7.0560e-04, PNorm = 175.1103, GNorm = 0.0716, lr_0 = 1.0585e-04
Loss = 5.2228e-04, PNorm = 175.1114, GNorm = 0.0493, lr_0 = 1.0577e-04
Loss = 1.5243e-03, PNorm = 175.1131, GNorm = 0.0866, lr_0 = 1.0570e-04
Loss = 1.0207e-03, PNorm = 175.1150, GNorm = 0.0620, lr_0 = 1.0563e-04
Loss = 1.8124e-03, PNorm = 175.1166, GNorm = 0.1403, lr_0 = 1.0556e-04
Loss = 1.3509e-03, PNorm = 175.1182, GNorm = 0.1043, lr_0 = 1.0548e-04
Loss = 7.5932e-04, PNorm = 175.1182, GNorm = 0.0734, lr_0 = 1.0541e-04
Loss = 1.8058e-03, PNorm = 175.1201, GNorm = 0.1581, lr_0 = 1.0534e-04
Loss = 9.1394e-04, PNorm = 175.1208, GNorm = 0.1758, lr_0 = 1.0527e-04
Loss = 1.3882e-03, PNorm = 175.1223, GNorm = 0.0552, lr_0 = 1.0519e-04
Loss = 1.3144e-03, PNorm = 175.1242, GNorm = 0.1009, lr_0 = 1.0512e-04
Loss = 1.2460e-03, PNorm = 175.1268, GNorm = 0.0849, lr_0 = 1.0505e-04
Loss = 2.4830e-03, PNorm = 175.1297, GNorm = 0.3586, lr_0 = 1.0498e-04
Loss = 1.0518e-03, PNorm = 175.1309, GNorm = 0.1049, lr_0 = 1.0491e-04
Loss = 6.4476e-04, PNorm = 175.1318, GNorm = 0.0655, lr_0 = 1.0483e-04
Loss = 6.7710e-04, PNorm = 175.1329, GNorm = 0.0726, lr_0 = 1.0476e-04
Loss = 5.5247e-04, PNorm = 175.1333, GNorm = 0.0915, lr_0 = 1.0469e-04
Loss = 2.7655e-03, PNorm = 175.1348, GNorm = 0.0968, lr_0 = 1.0462e-04
Loss = 1.7649e-03, PNorm = 175.1360, GNorm = 0.0587, lr_0 = 1.0455e-04
Loss = 1.0379e-03, PNorm = 175.1369, GNorm = 0.0453, lr_0 = 1.0448e-04
Loss = 9.1301e-04, PNorm = 175.1377, GNorm = 0.1398, lr_0 = 1.0440e-04
Loss = 8.9108e-04, PNorm = 175.1395, GNorm = 0.0402, lr_0 = 1.0433e-04
Loss = 7.5877e-04, PNorm = 175.1406, GNorm = 0.0468, lr_0 = 1.0426e-04
Loss = 1.0346e-03, PNorm = 175.1416, GNorm = 0.0578, lr_0 = 1.0419e-04
Loss = 6.3346e-04, PNorm = 175.1425, GNorm = 0.1345, lr_0 = 1.0412e-04
Loss = 4.6444e-04, PNorm = 175.1440, GNorm = 0.0478, lr_0 = 1.0405e-04
Loss = 7.9786e-04, PNorm = 175.1453, GNorm = 0.0982, lr_0 = 1.0398e-04
Loss = 4.6823e-04, PNorm = 175.1455, GNorm = 0.1254, lr_0 = 1.0391e-04
Loss = 2.1413e-03, PNorm = 175.1464, GNorm = 0.0739, lr_0 = 1.0383e-04
Loss = 7.6504e-04, PNorm = 175.1475, GNorm = 0.1015, lr_0 = 1.0376e-04
Loss = 1.2958e-03, PNorm = 175.1485, GNorm = 0.0832, lr_0 = 1.0369e-04
Loss = 3.1256e-03, PNorm = 175.1492, GNorm = 0.0851, lr_0 = 1.0362e-04
Loss = 1.8619e-03, PNorm = 175.1496, GNorm = 0.2945, lr_0 = 1.0355e-04
Loss = 1.4576e-03, PNorm = 175.1505, GNorm = 0.0174, lr_0 = 1.0348e-04
Loss = 5.2546e-04, PNorm = 175.1522, GNorm = 0.0382, lr_0 = 1.0341e-04
Loss = 5.0626e-04, PNorm = 175.1532, GNorm = 0.0570, lr_0 = 1.0334e-04
Loss = 1.3497e-03, PNorm = 175.1543, GNorm = 0.2755, lr_0 = 1.0327e-04
Loss = 1.4409e-03, PNorm = 175.1566, GNorm = 0.0515, lr_0 = 1.0320e-04
Loss = 5.5509e-04, PNorm = 175.1584, GNorm = 0.1832, lr_0 = 1.0312e-04
Loss = 3.7495e-03, PNorm = 175.1592, GNorm = 0.1451, lr_0 = 1.0305e-04
Loss = 6.1703e-04, PNorm = 175.1599, GNorm = 0.1403, lr_0 = 1.0298e-04
Loss = 1.8577e-03, PNorm = 175.1618, GNorm = 0.2376, lr_0 = 1.0291e-04
Loss = 1.8192e-03, PNorm = 175.1615, GNorm = 0.1882, lr_0 = 1.0284e-04
Loss = 5.1676e-04, PNorm = 175.1635, GNorm = 0.2337, lr_0 = 1.0277e-04
Loss = 2.1507e-03, PNorm = 175.1648, GNorm = 0.0661, lr_0 = 1.0270e-04
Loss = 1.0697e-03, PNorm = 175.1664, GNorm = 0.1118, lr_0 = 1.0263e-04
Loss = 1.0107e-03, PNorm = 175.1669, GNorm = 0.0751, lr_0 = 1.0256e-04
Loss = 3.8360e-04, PNorm = 175.1682, GNorm = 0.0695, lr_0 = 1.0249e-04
Loss = 1.1322e-03, PNorm = 175.1701, GNorm = 0.2321, lr_0 = 1.0242e-04
Loss = 7.1842e-04, PNorm = 175.1729, GNorm = 0.0646, lr_0 = 1.0235e-04
Loss = 5.0562e-04, PNorm = 175.1746, GNorm = 0.0283, lr_0 = 1.0228e-04
Loss = 9.6711e-04, PNorm = 175.1765, GNorm = 0.0188, lr_0 = 1.0221e-04
Loss = 1.2768e-03, PNorm = 175.1775, GNorm = 0.1460, lr_0 = 1.0214e-04
Loss = 3.1686e-03, PNorm = 175.1777, GNorm = 0.0467, lr_0 = 1.0207e-04
Loss = 6.1719e-04, PNorm = 175.1780, GNorm = 0.0895, lr_0 = 1.0200e-04
Loss = 1.7045e-03, PNorm = 175.1792, GNorm = 0.0399, lr_0 = 1.0193e-04
Loss = 5.2736e-04, PNorm = 175.1807, GNorm = 0.0545, lr_0 = 1.0186e-04
Loss = 3.0956e-03, PNorm = 175.1816, GNorm = 0.1716, lr_0 = 1.0179e-04
Loss = 7.7513e-04, PNorm = 175.1808, GNorm = 0.0977, lr_0 = 1.0172e-04
Loss = 8.5596e-04, PNorm = 175.1822, GNorm = 0.1425, lr_0 = 1.0165e-04
Loss = 1.1779e-03, PNorm = 175.1839, GNorm = 0.0747, lr_0 = 1.0158e-04
Loss = 1.3052e-03, PNorm = 175.1858, GNorm = 0.0641, lr_0 = 1.0151e-04
Loss = 8.5839e-04, PNorm = 175.1867, GNorm = 0.0664, lr_0 = 1.0144e-04
Loss = 9.9234e-04, PNorm = 175.1886, GNorm = 0.0694, lr_0 = 1.0137e-04
Loss = 1.5514e-03, PNorm = 175.1897, GNorm = 0.0484, lr_0 = 1.0130e-04
Loss = 1.1542e-03, PNorm = 175.1928, GNorm = 0.0934, lr_0 = 1.0123e-04
Loss = 8.6134e-04, PNorm = 175.1936, GNorm = 0.0919, lr_0 = 1.0116e-04
Loss = 4.2277e-04, PNorm = 175.1946, GNorm = 0.1011, lr_0 = 1.0110e-04
Loss = 7.9552e-04, PNorm = 175.1956, GNorm = 0.0926, lr_0 = 1.0103e-04
Loss = 1.7718e-03, PNorm = 175.1969, GNorm = 0.0353, lr_0 = 1.0096e-04
Loss = 1.2410e-03, PNorm = 175.1967, GNorm = 0.0706, lr_0 = 1.0089e-04
Loss = 1.5116e-03, PNorm = 175.1968, GNorm = 0.2602, lr_0 = 1.0082e-04
Loss = 2.5293e-03, PNorm = 175.1975, GNorm = 0.0845, lr_0 = 1.0075e-04
Loss = 1.1269e-03, PNorm = 175.1989, GNorm = 0.1801, lr_0 = 1.0068e-04
Loss = 9.9185e-04, PNorm = 175.2004, GNorm = 0.0954, lr_0 = 1.0061e-04
Loss = 6.1810e-04, PNorm = 175.2024, GNorm = 0.0281, lr_0 = 1.0054e-04
Loss = 8.3504e-04, PNorm = 175.2029, GNorm = 0.1128, lr_0 = 1.0047e-04
Loss = 1.3998e-03, PNorm = 175.2039, GNorm = 0.1011, lr_0 = 1.0041e-04
Loss = 1.2159e-03, PNorm = 175.2062, GNorm = 0.0384, lr_0 = 1.0034e-04
Loss = 9.7157e-04, PNorm = 175.2088, GNorm = 0.0942, lr_0 = 1.0027e-04
Loss = 4.7149e-04, PNorm = 175.2108, GNorm = 0.0894, lr_0 = 1.0020e-04
Loss = 7.7294e-04, PNorm = 175.2128, GNorm = 0.0692, lr_0 = 1.0013e-04
Loss = 1.5400e-03, PNorm = 175.2133, GNorm = 0.2434, lr_0 = 1.0006e-04
Loss = 1.3121e-03, PNorm = 175.2142, GNorm = 0.0488, lr_0 = 1.0000e-04
Validation mae = 0.278255
Model 0 best validation mae = 0.278255 on epoch 29
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.274740
Ensemble test mae = 0.274740
Fold 8
Splitting data with seed 8
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 8.7100e-01, PNorm = 65.7666, GNorm = 1.9236, lr_0 = 1.0413e-04
Loss = 6.7608e-01, PNorm = 65.7804, GNorm = 1.9804, lr_0 = 1.0788e-04
Loss = 6.2170e-01, PNorm = 65.7949, GNorm = 2.0633, lr_0 = 1.1163e-04
Loss = 4.7185e-01, PNorm = 65.8073, GNorm = 2.3347, lr_0 = 1.1537e-04
Loss = 3.7566e-01, PNorm = 65.8179, GNorm = 2.1612, lr_0 = 1.1913e-04
Loss = 4.3937e-01, PNorm = 65.8263, GNorm = 2.0520, lr_0 = 1.2287e-04
Loss = 3.7267e-01, PNorm = 65.8329, GNorm = 2.0730, lr_0 = 1.2663e-04
Loss = 3.7862e-01, PNorm = 65.8418, GNorm = 2.8877, lr_0 = 1.3038e-04
Loss = 4.3105e-01, PNorm = 65.8511, GNorm = 3.0511, lr_0 = 1.3413e-04
Loss = 4.5213e-01, PNorm = 65.8617, GNorm = 1.7289, lr_0 = 1.3788e-04
Loss = 4.0271e-01, PNorm = 65.8737, GNorm = 3.2680, lr_0 = 1.4163e-04
Loss = 3.9853e-01, PNorm = 65.8833, GNorm = 2.3506, lr_0 = 1.4537e-04
Loss = 3.7586e-01, PNorm = 65.8935, GNorm = 2.1610, lr_0 = 1.4913e-04
Loss = 3.7245e-01, PNorm = 65.9037, GNorm = 2.1788, lr_0 = 1.5288e-04
Loss = 3.5192e-01, PNorm = 65.9138, GNorm = 1.6227, lr_0 = 1.5662e-04
Loss = 3.7834e-01, PNorm = 65.9253, GNorm = 2.0138, lr_0 = 1.6038e-04
Loss = 3.5296e-01, PNorm = 65.9366, GNorm = 2.1659, lr_0 = 1.6412e-04
Loss = 3.4169e-01, PNorm = 65.9487, GNorm = 1.8498, lr_0 = 1.6788e-04
Loss = 3.3875e-01, PNorm = 65.9605, GNorm = 2.2284, lr_0 = 1.7163e-04
Loss = 3.2504e-01, PNorm = 65.9715, GNorm = 1.8655, lr_0 = 1.7538e-04
Loss = 3.7785e-01, PNorm = 65.9830, GNorm = 1.6195, lr_0 = 1.7913e-04
Loss = 3.5463e-01, PNorm = 65.9972, GNorm = 1.3552, lr_0 = 1.8288e-04
Loss = 3.4499e-01, PNorm = 66.0116, GNorm = 1.8533, lr_0 = 1.8662e-04
Loss = 3.5709e-01, PNorm = 66.0228, GNorm = 1.5900, lr_0 = 1.9038e-04
Loss = 3.4251e-01, PNorm = 66.0405, GNorm = 1.7055, lr_0 = 1.9413e-04
Loss = 3.2149e-01, PNorm = 66.0569, GNorm = 1.8247, lr_0 = 1.9788e-04
Loss = 2.9564e-01, PNorm = 66.0692, GNorm = 2.1663, lr_0 = 2.0163e-04
Loss = 3.1382e-01, PNorm = 66.0849, GNorm = 2.3814, lr_0 = 2.0537e-04
Loss = 3.6014e-01, PNorm = 66.1025, GNorm = 2.6745, lr_0 = 2.0913e-04
Loss = 3.7055e-01, PNorm = 66.1194, GNorm = 1.9102, lr_0 = 2.1288e-04
Loss = 3.4281e-01, PNorm = 66.1411, GNorm = 3.3543, lr_0 = 2.1663e-04
Loss = 3.6658e-01, PNorm = 66.1576, GNorm = 3.1528, lr_0 = 2.2038e-04
Loss = 3.0067e-01, PNorm = 66.1734, GNorm = 2.3586, lr_0 = 2.2412e-04
Loss = 3.8260e-01, PNorm = 66.1931, GNorm = 2.0732, lr_0 = 2.2787e-04
Loss = 2.8773e-01, PNorm = 66.2117, GNorm = 1.2652, lr_0 = 2.3163e-04
Loss = 3.2500e-01, PNorm = 66.2293, GNorm = 1.6787, lr_0 = 2.3538e-04
Loss = 3.2238e-01, PNorm = 66.2516, GNorm = 2.3715, lr_0 = 2.3913e-04
Loss = 2.9655e-01, PNorm = 66.2717, GNorm = 1.4826, lr_0 = 2.4288e-04
Loss = 3.2254e-01, PNorm = 66.2899, GNorm = 1.8607, lr_0 = 2.4662e-04
Loss = 3.0820e-01, PNorm = 66.3117, GNorm = 2.3560, lr_0 = 2.5038e-04
Loss = 2.7738e-01, PNorm = 66.3320, GNorm = 1.4680, lr_0 = 2.5413e-04
Loss = 3.3784e-01, PNorm = 66.3513, GNorm = 2.0889, lr_0 = 2.5788e-04
Loss = 3.0621e-01, PNorm = 66.3744, GNorm = 1.5363, lr_0 = 2.6163e-04
Loss = 2.7600e-01, PNorm = 66.3924, GNorm = 1.4881, lr_0 = 2.6537e-04
Loss = 2.9097e-01, PNorm = 66.4155, GNorm = 1.5326, lr_0 = 2.6912e-04
Loss = 2.7487e-01, PNorm = 66.4403, GNorm = 1.2530, lr_0 = 2.7288e-04
Loss = 2.6780e-01, PNorm = 66.4602, GNorm = 1.2282, lr_0 = 2.7663e-04
Loss = 3.1367e-01, PNorm = 66.4835, GNorm = 1.6758, lr_0 = 2.8038e-04
Loss = 2.9739e-01, PNorm = 66.5102, GNorm = 0.9407, lr_0 = 2.8413e-04
Loss = 2.8240e-01, PNorm = 66.5342, GNorm = 1.5567, lr_0 = 2.8787e-04
Loss = 2.4805e-01, PNorm = 66.5590, GNorm = 1.3087, lr_0 = 2.9163e-04
Loss = 2.8928e-01, PNorm = 66.5790, GNorm = 1.3081, lr_0 = 2.9538e-04
Loss = 2.7717e-01, PNorm = 66.6047, GNorm = 1.9731, lr_0 = 2.9913e-04
Loss = 2.7089e-01, PNorm = 66.6293, GNorm = 1.2693, lr_0 = 3.0288e-04
Loss = 3.0453e-01, PNorm = 66.6562, GNorm = 1.1980, lr_0 = 3.0662e-04
Loss = 2.6446e-01, PNorm = 66.6832, GNorm = 2.4577, lr_0 = 3.1037e-04
Loss = 2.7881e-01, PNorm = 66.7113, GNorm = 1.8161, lr_0 = 3.1413e-04
Loss = 2.9029e-01, PNorm = 66.7392, GNorm = 2.1186, lr_0 = 3.1788e-04
Loss = 2.9443e-01, PNorm = 66.7660, GNorm = 1.5551, lr_0 = 3.2163e-04
Loss = 2.9384e-01, PNorm = 66.8020, GNorm = 1.5152, lr_0 = 3.2538e-04
Loss = 2.9191e-01, PNorm = 66.8342, GNorm = 1.2137, lr_0 = 3.2912e-04
Loss = 2.6892e-01, PNorm = 66.8677, GNorm = 1.3986, lr_0 = 3.3288e-04
Loss = 2.7390e-01, PNorm = 66.8958, GNorm = 1.2145, lr_0 = 3.3663e-04
Loss = 2.7732e-01, PNorm = 66.9296, GNorm = 1.7541, lr_0 = 3.4038e-04
Loss = 2.9341e-01, PNorm = 66.9632, GNorm = 1.0093, lr_0 = 3.4413e-04
Loss = 2.6883e-01, PNorm = 66.9964, GNorm = 0.9948, lr_0 = 3.4787e-04
Loss = 2.7517e-01, PNorm = 67.0291, GNorm = 1.1882, lr_0 = 3.5162e-04
Loss = 2.8487e-01, PNorm = 67.0614, GNorm = 1.2121, lr_0 = 3.5538e-04
Loss = 2.8822e-01, PNorm = 67.0949, GNorm = 0.9770, lr_0 = 3.5913e-04
Loss = 2.9921e-01, PNorm = 67.1332, GNorm = 1.0191, lr_0 = 3.6288e-04
Loss = 2.5640e-01, PNorm = 67.1639, GNorm = 1.3096, lr_0 = 3.6662e-04
Loss = 2.7847e-01, PNorm = 67.1957, GNorm = 0.9385, lr_0 = 3.7037e-04
Loss = 2.6238e-01, PNorm = 67.2296, GNorm = 0.9939, lr_0 = 3.7413e-04
Loss = 2.8085e-01, PNorm = 67.2573, GNorm = 1.7145, lr_0 = 3.7788e-04
Loss = 3.4499e-01, PNorm = 67.2986, GNorm = 1.0372, lr_0 = 3.8163e-04
Loss = 3.1480e-01, PNorm = 67.3363, GNorm = 1.8504, lr_0 = 3.8537e-04
Loss = 3.0455e-01, PNorm = 67.3754, GNorm = 0.8660, lr_0 = 3.8912e-04
Loss = 2.5548e-01, PNorm = 67.4159, GNorm = 1.0600, lr_0 = 3.9287e-04
Loss = 3.0558e-01, PNorm = 67.4534, GNorm = 2.0304, lr_0 = 3.9663e-04
Loss = 2.6843e-01, PNorm = 67.4905, GNorm = 1.0926, lr_0 = 4.0038e-04
Loss = 2.5574e-01, PNorm = 67.5333, GNorm = 1.1750, lr_0 = 4.0413e-04
Loss = 2.3399e-01, PNorm = 67.5648, GNorm = 1.1297, lr_0 = 4.0787e-04
Loss = 2.7453e-01, PNorm = 67.5994, GNorm = 1.0882, lr_0 = 4.1162e-04
Loss = 2.7325e-01, PNorm = 67.6358, GNorm = 1.1560, lr_0 = 4.1537e-04
Loss = 2.7324e-01, PNorm = 67.6820, GNorm = 1.4904, lr_0 = 4.1913e-04
Loss = 3.0124e-01, PNorm = 67.7228, GNorm = 1.2747, lr_0 = 4.2288e-04
Loss = 2.4759e-01, PNorm = 67.7668, GNorm = 1.1197, lr_0 = 4.2662e-04
Loss = 2.6269e-01, PNorm = 67.8051, GNorm = 1.0097, lr_0 = 4.3037e-04
Loss = 2.4265e-01, PNorm = 67.8451, GNorm = 1.2111, lr_0 = 4.3412e-04
Loss = 2.3546e-01, PNorm = 67.8871, GNorm = 1.1251, lr_0 = 4.3788e-04
Loss = 2.7153e-01, PNorm = 67.9325, GNorm = 1.0753, lr_0 = 4.4163e-04
Loss = 2.2867e-01, PNorm = 67.9757, GNorm = 1.1861, lr_0 = 4.4538e-04
Loss = 2.4793e-01, PNorm = 68.0226, GNorm = 1.3172, lr_0 = 4.4912e-04
Loss = 2.5990e-01, PNorm = 68.0582, GNorm = 1.1934, lr_0 = 4.5287e-04
Loss = 2.6421e-01, PNorm = 68.1083, GNorm = 1.0747, lr_0 = 4.5662e-04
Loss = 2.7444e-01, PNorm = 68.1530, GNorm = 0.9833, lr_0 = 4.6038e-04
Loss = 2.5264e-01, PNorm = 68.1939, GNorm = 1.0519, lr_0 = 4.6413e-04
Loss = 2.7271e-01, PNorm = 68.2430, GNorm = 1.0967, lr_0 = 4.6787e-04
Loss = 2.4442e-01, PNorm = 68.2929, GNorm = 1.0506, lr_0 = 4.7162e-04
Loss = 2.6184e-01, PNorm = 68.3328, GNorm = 0.9548, lr_0 = 4.7537e-04
Loss = 2.5908e-01, PNorm = 68.3855, GNorm = 0.9008, lr_0 = 4.7913e-04
Loss = 2.6920e-01, PNorm = 68.4340, GNorm = 1.4408, lr_0 = 4.8288e-04
Loss = 2.6483e-01, PNorm = 68.4839, GNorm = 1.0020, lr_0 = 4.8663e-04
Loss = 2.5978e-01, PNorm = 68.5386, GNorm = 1.1179, lr_0 = 4.9038e-04
Loss = 2.5576e-01, PNorm = 68.5935, GNorm = 1.1026, lr_0 = 4.9412e-04
Loss = 2.6315e-01, PNorm = 68.6468, GNorm = 1.0464, lr_0 = 4.9788e-04
Loss = 2.8891e-01, PNorm = 68.7010, GNorm = 1.3417, lr_0 = 5.0163e-04
Loss = 2.6095e-01, PNorm = 68.7623, GNorm = 1.0105, lr_0 = 5.0538e-04
Loss = 2.7041e-01, PNorm = 68.8124, GNorm = 1.0125, lr_0 = 5.0913e-04
Loss = 2.8154e-01, PNorm = 68.8748, GNorm = 1.4905, lr_0 = 5.1287e-04
Loss = 2.3881e-01, PNorm = 68.9322, GNorm = 0.9739, lr_0 = 5.1663e-04
Loss = 2.5670e-01, PNorm = 68.9831, GNorm = 1.0943, lr_0 = 5.2038e-04
Loss = 2.6487e-01, PNorm = 69.0477, GNorm = 1.4696, lr_0 = 5.2413e-04
Loss = 2.3860e-01, PNorm = 69.1053, GNorm = 1.2232, lr_0 = 5.2788e-04
Loss = 2.7845e-01, PNorm = 69.1676, GNorm = 1.0347, lr_0 = 5.3162e-04
Loss = 3.0006e-01, PNorm = 69.2284, GNorm = 1.3120, lr_0 = 5.3538e-04
Loss = 2.5304e-01, PNorm = 69.3056, GNorm = 0.7338, lr_0 = 5.3912e-04
Loss = 2.5659e-01, PNorm = 69.3740, GNorm = 1.1227, lr_0 = 5.4288e-04
Loss = 2.5875e-01, PNorm = 69.4398, GNorm = 1.0175, lr_0 = 5.4663e-04
Loss = 2.7229e-01, PNorm = 69.5027, GNorm = 0.8689, lr_0 = 5.5038e-04
Validation mae = 0.316100
Epoch 1
Loss = 1.7151e-01, PNorm = 69.5663, GNorm = 0.8150, lr_0 = 5.5413e-04
Loss = 1.7226e-01, PNorm = 69.6224, GNorm = 0.6644, lr_0 = 5.5787e-04
Loss = 1.8948e-01, PNorm = 69.6693, GNorm = 0.9767, lr_0 = 5.6163e-04
Loss = 1.9096e-01, PNorm = 69.7221, GNorm = 1.0318, lr_0 = 5.6538e-04
Loss = 1.6914e-01, PNorm = 69.7817, GNorm = 1.0962, lr_0 = 5.6913e-04
Loss = 1.6299e-01, PNorm = 69.8304, GNorm = 0.7542, lr_0 = 5.7288e-04
Loss = 1.5053e-01, PNorm = 69.8923, GNorm = 0.6854, lr_0 = 5.7662e-04
Loss = 1.7527e-01, PNorm = 69.9604, GNorm = 0.7300, lr_0 = 5.8038e-04
Loss = 1.7861e-01, PNorm = 70.0218, GNorm = 0.6502, lr_0 = 5.8413e-04
Loss = 1.6172e-01, PNorm = 70.0923, GNorm = 1.0120, lr_0 = 5.8788e-04
Loss = 1.5401e-01, PNorm = 70.1538, GNorm = 0.6685, lr_0 = 5.9163e-04
Loss = 1.5340e-01, PNorm = 70.2165, GNorm = 0.9107, lr_0 = 5.9538e-04
Loss = 1.7426e-01, PNorm = 70.2802, GNorm = 0.9452, lr_0 = 5.9913e-04
Loss = 1.6742e-01, PNorm = 70.3495, GNorm = 0.6064, lr_0 = 6.0288e-04
Loss = 1.6351e-01, PNorm = 70.4161, GNorm = 1.0517, lr_0 = 6.0663e-04
Loss = 1.6318e-01, PNorm = 70.4933, GNorm = 1.1638, lr_0 = 6.1038e-04
Loss = 1.8121e-01, PNorm = 70.5555, GNorm = 0.8483, lr_0 = 6.1413e-04
Loss = 1.6006e-01, PNorm = 70.6401, GNorm = 0.9324, lr_0 = 6.1788e-04
Loss = 1.8918e-01, PNorm = 70.7165, GNorm = 0.8102, lr_0 = 6.2163e-04
Loss = 1.7064e-01, PNorm = 70.8063, GNorm = 1.1127, lr_0 = 6.2538e-04
Loss = 1.6557e-01, PNorm = 70.8832, GNorm = 0.9579, lr_0 = 6.2913e-04
Loss = 1.9667e-01, PNorm = 70.9610, GNorm = 1.3245, lr_0 = 6.3288e-04
Loss = 1.5943e-01, PNorm = 71.0524, GNorm = 0.9710, lr_0 = 6.3663e-04
Loss = 1.6613e-01, PNorm = 71.1276, GNorm = 0.8927, lr_0 = 6.4038e-04
Loss = 1.7136e-01, PNorm = 71.2018, GNorm = 0.7953, lr_0 = 6.4413e-04
Loss = 1.6326e-01, PNorm = 71.2912, GNorm = 1.3224, lr_0 = 6.4788e-04
Loss = 1.8119e-01, PNorm = 71.3631, GNorm = 0.8304, lr_0 = 6.5163e-04
Loss = 1.8114e-01, PNorm = 71.4522, GNorm = 1.0686, lr_0 = 6.5538e-04
Loss = 1.5419e-01, PNorm = 71.5431, GNorm = 1.0208, lr_0 = 6.5913e-04
Loss = 2.0362e-01, PNorm = 71.6413, GNorm = 0.9006, lr_0 = 6.6288e-04
Loss = 1.7737e-01, PNorm = 71.7331, GNorm = 0.8324, lr_0 = 6.6663e-04
Loss = 2.0050e-01, PNorm = 71.8339, GNorm = 1.0020, lr_0 = 6.7038e-04
Loss = 1.8618e-01, PNorm = 71.9321, GNorm = 0.9604, lr_0 = 6.7413e-04
Loss = 1.7566e-01, PNorm = 72.0372, GNorm = 0.8257, lr_0 = 6.7788e-04
Loss = 1.8924e-01, PNorm = 72.1330, GNorm = 0.8065, lr_0 = 6.8163e-04
Loss = 2.1299e-01, PNorm = 72.2343, GNorm = 0.9466, lr_0 = 6.8538e-04
Loss = 1.9193e-01, PNorm = 72.3376, GNorm = 0.8618, lr_0 = 6.8913e-04
Loss = 1.8051e-01, PNorm = 72.4472, GNorm = 0.9650, lr_0 = 6.9288e-04
Loss = 1.8535e-01, PNorm = 72.5466, GNorm = 1.0913, lr_0 = 6.9663e-04
Loss = 2.1196e-01, PNorm = 72.6518, GNorm = 0.9405, lr_0 = 7.0038e-04
Loss = 1.9684e-01, PNorm = 72.7672, GNorm = 1.3571, lr_0 = 7.0413e-04
Loss = 1.7798e-01, PNorm = 72.8742, GNorm = 0.6979, lr_0 = 7.0788e-04
Loss = 1.9123e-01, PNorm = 72.9873, GNorm = 0.7263, lr_0 = 7.1163e-04
Loss = 2.1006e-01, PNorm = 73.0876, GNorm = 1.1976, lr_0 = 7.1538e-04
Loss = 2.1339e-01, PNorm = 73.1982, GNorm = 1.0136, lr_0 = 7.1913e-04
Loss = 2.0695e-01, PNorm = 73.3069, GNorm = 0.9924, lr_0 = 7.2288e-04
Loss = 1.9194e-01, PNorm = 73.4176, GNorm = 0.9890, lr_0 = 7.2663e-04
Loss = 2.0210e-01, PNorm = 73.5276, GNorm = 0.8571, lr_0 = 7.3038e-04
Loss = 2.1968e-01, PNorm = 73.6435, GNorm = 0.9898, lr_0 = 7.3413e-04
Loss = 2.2488e-01, PNorm = 73.7577, GNorm = 1.1664, lr_0 = 7.3788e-04
Loss = 1.9012e-01, PNorm = 73.8765, GNorm = 0.9620, lr_0 = 7.4163e-04
Loss = 1.8982e-01, PNorm = 73.9823, GNorm = 0.7623, lr_0 = 7.4538e-04
Loss = 1.8575e-01, PNorm = 74.0818, GNorm = 1.0503, lr_0 = 7.4913e-04
Loss = 1.8132e-01, PNorm = 74.1899, GNorm = 0.9632, lr_0 = 7.5288e-04
Loss = 2.1173e-01, PNorm = 74.2942, GNorm = 1.0495, lr_0 = 7.5663e-04
Loss = 2.1590e-01, PNorm = 74.4170, GNorm = 0.8910, lr_0 = 7.6038e-04
Loss = 1.8788e-01, PNorm = 74.5240, GNorm = 0.7732, lr_0 = 7.6413e-04
Loss = 1.9610e-01, PNorm = 74.6409, GNorm = 0.6690, lr_0 = 7.6788e-04
Loss = 2.2600e-01, PNorm = 74.7523, GNorm = 1.0229, lr_0 = 7.7163e-04
Loss = 2.0210e-01, PNorm = 74.8810, GNorm = 0.9330, lr_0 = 7.7538e-04
Loss = 1.9947e-01, PNorm = 74.9989, GNorm = 0.7005, lr_0 = 7.7913e-04
Loss = 2.0819e-01, PNorm = 75.1106, GNorm = 0.9391, lr_0 = 7.8288e-04
Loss = 2.1774e-01, PNorm = 75.2253, GNorm = 0.8849, lr_0 = 7.8663e-04
Loss = 2.2847e-01, PNorm = 75.3421, GNorm = 0.7643, lr_0 = 7.9038e-04
Loss = 1.9112e-01, PNorm = 75.4597, GNorm = 0.9250, lr_0 = 7.9413e-04
Loss = 2.1534e-01, PNorm = 75.5869, GNorm = 0.8321, lr_0 = 7.9788e-04
Loss = 1.9988e-01, PNorm = 75.6977, GNorm = 0.7498, lr_0 = 8.0163e-04
Loss = 2.0535e-01, PNorm = 75.8131, GNorm = 0.7727, lr_0 = 8.0538e-04
Loss = 2.0663e-01, PNorm = 75.9197, GNorm = 0.8614, lr_0 = 8.0913e-04
Loss = 2.3813e-01, PNorm = 76.0493, GNorm = 0.9597, lr_0 = 8.1288e-04
Loss = 2.3161e-01, PNorm = 76.1872, GNorm = 0.7537, lr_0 = 8.1663e-04
Loss = 1.8584e-01, PNorm = 76.3148, GNorm = 0.7145, lr_0 = 8.2038e-04
Loss = 2.2761e-01, PNorm = 76.4377, GNorm = 1.0319, lr_0 = 8.2413e-04
Loss = 2.2980e-01, PNorm = 76.5564, GNorm = 1.0386, lr_0 = 8.2788e-04
Loss = 2.3373e-01, PNorm = 76.6904, GNorm = 0.9968, lr_0 = 8.3163e-04
Loss = 1.9577e-01, PNorm = 76.8246, GNorm = 0.8425, lr_0 = 8.3538e-04
Loss = 1.8118e-01, PNorm = 76.9479, GNorm = 0.8173, lr_0 = 8.3913e-04
Loss = 2.0355e-01, PNorm = 77.0701, GNorm = 0.5516, lr_0 = 8.4288e-04
Loss = 1.9418e-01, PNorm = 77.1881, GNorm = 1.0585, lr_0 = 8.4663e-04
Loss = 2.0037e-01, PNorm = 77.3153, GNorm = 0.8467, lr_0 = 8.5038e-04
Loss = 1.8220e-01, PNorm = 77.4338, GNorm = 0.7473, lr_0 = 8.5413e-04
Loss = 2.2542e-01, PNorm = 77.5637, GNorm = 1.2680, lr_0 = 8.5788e-04
Loss = 2.0351e-01, PNorm = 77.7069, GNorm = 0.7010, lr_0 = 8.6163e-04
Loss = 2.4614e-01, PNorm = 77.8482, GNorm = 0.7122, lr_0 = 8.6538e-04
Loss = 1.9896e-01, PNorm = 78.0053, GNorm = 0.9853, lr_0 = 8.6913e-04
Loss = 2.0940e-01, PNorm = 78.1415, GNorm = 0.6157, lr_0 = 8.7288e-04
Loss = 2.0343e-01, PNorm = 78.2774, GNorm = 1.3239, lr_0 = 8.7663e-04
Loss = 2.0715e-01, PNorm = 78.4092, GNorm = 0.7805, lr_0 = 8.8038e-04
Loss = 2.0772e-01, PNorm = 78.5462, GNorm = 1.1587, lr_0 = 8.8413e-04
Loss = 1.9539e-01, PNorm = 78.6789, GNorm = 0.7774, lr_0 = 8.8788e-04
Loss = 1.7536e-01, PNorm = 78.8108, GNorm = 0.9848, lr_0 = 8.9163e-04
Loss = 1.9669e-01, PNorm = 78.9376, GNorm = 0.6723, lr_0 = 8.9538e-04
Loss = 1.8278e-01, PNorm = 79.0788, GNorm = 0.7460, lr_0 = 8.9913e-04
Loss = 2.3179e-01, PNorm = 79.2082, GNorm = 0.7404, lr_0 = 9.0288e-04
Loss = 2.0078e-01, PNorm = 79.3367, GNorm = 1.5984, lr_0 = 9.0663e-04
Loss = 2.1206e-01, PNorm = 79.4660, GNorm = 0.7751, lr_0 = 9.1038e-04
Loss = 2.1933e-01, PNorm = 79.5956, GNorm = 0.9223, lr_0 = 9.1413e-04
Loss = 2.1895e-01, PNorm = 79.7383, GNorm = 0.8688, lr_0 = 9.1788e-04
Loss = 2.1246e-01, PNorm = 79.8707, GNorm = 0.6050, lr_0 = 9.2163e-04
Loss = 1.9115e-01, PNorm = 80.0051, GNorm = 0.6746, lr_0 = 9.2538e-04
Loss = 2.2195e-01, PNorm = 80.1459, GNorm = 0.7125, lr_0 = 9.2913e-04
Loss = 1.9201e-01, PNorm = 80.2801, GNorm = 0.7407, lr_0 = 9.3288e-04
Loss = 1.9829e-01, PNorm = 80.4225, GNorm = 0.9014, lr_0 = 9.3663e-04
Loss = 2.0280e-01, PNorm = 80.5512, GNorm = 0.6489, lr_0 = 9.4038e-04
Loss = 2.0166e-01, PNorm = 80.6948, GNorm = 1.3706, lr_0 = 9.4413e-04
Loss = 1.9559e-01, PNorm = 80.8274, GNorm = 0.6314, lr_0 = 9.4788e-04
Loss = 1.8649e-01, PNorm = 80.9700, GNorm = 0.6260, lr_0 = 9.5163e-04
Loss = 1.8552e-01, PNorm = 81.0902, GNorm = 0.6116, lr_0 = 9.5538e-04
Loss = 1.8584e-01, PNorm = 81.2167, GNorm = 0.8268, lr_0 = 9.5913e-04
Loss = 2.1214e-01, PNorm = 81.3519, GNorm = 0.7255, lr_0 = 9.6288e-04
Loss = 2.2728e-01, PNorm = 81.4982, GNorm = 0.6554, lr_0 = 9.6663e-04
Loss = 1.9163e-01, PNorm = 81.6490, GNorm = 0.8254, lr_0 = 9.7038e-04
Loss = 2.2638e-01, PNorm = 81.7981, GNorm = 1.1648, lr_0 = 9.7413e-04
Loss = 2.3046e-01, PNorm = 81.9446, GNorm = 0.8550, lr_0 = 9.7788e-04
Loss = 2.0334e-01, PNorm = 82.1007, GNorm = 0.7548, lr_0 = 9.8163e-04
Loss = 1.8844e-01, PNorm = 82.2554, GNorm = 0.8448, lr_0 = 9.8537e-04
Loss = 2.2236e-01, PNorm = 82.4051, GNorm = 0.5923, lr_0 = 9.8912e-04
Loss = 2.4821e-01, PNorm = 82.5626, GNorm = 1.2517, lr_0 = 9.9288e-04
Loss = 2.0427e-01, PNorm = 82.7148, GNorm = 0.6899, lr_0 = 9.9663e-04
Loss = 2.0797e-01, PNorm = 82.8623, GNorm = 0.7652, lr_0 = 9.9993e-04
Validation mae = 0.308895
Epoch 2
Loss = 1.3928e-01, PNorm = 83.0191, GNorm = 0.7107, lr_0 = 9.9925e-04
Loss = 1.2365e-01, PNorm = 83.1533, GNorm = 0.6270, lr_0 = 9.9856e-04
Loss = 1.2785e-01, PNorm = 83.2646, GNorm = 0.5837, lr_0 = 9.9788e-04
Loss = 1.1756e-01, PNorm = 83.3706, GNorm = 0.5924, lr_0 = 9.9719e-04
Loss = 1.2084e-01, PNorm = 83.4774, GNorm = 0.7707, lr_0 = 9.9651e-04
Loss = 1.1669e-01, PNorm = 83.5795, GNorm = 0.5571, lr_0 = 9.9583e-04
Loss = 1.1939e-01, PNorm = 83.6903, GNorm = 0.6344, lr_0 = 9.9515e-04
Loss = 1.3879e-01, PNorm = 83.8024, GNorm = 0.8818, lr_0 = 9.9446e-04
Loss = 1.3355e-01, PNorm = 83.9254, GNorm = 0.5149, lr_0 = 9.9378e-04
Loss = 1.4480e-01, PNorm = 84.0368, GNorm = 0.7307, lr_0 = 9.9310e-04
Loss = 1.1714e-01, PNorm = 84.1592, GNorm = 0.5863, lr_0 = 9.9242e-04
Loss = 1.4682e-01, PNorm = 84.2666, GNorm = 0.5955, lr_0 = 9.9174e-04
Loss = 1.1597e-01, PNorm = 84.3877, GNorm = 0.4449, lr_0 = 9.9106e-04
Loss = 1.2046e-01, PNorm = 84.4945, GNorm = 0.7926, lr_0 = 9.9038e-04
Loss = 1.3169e-01, PNorm = 84.5991, GNorm = 0.5676, lr_0 = 9.8971e-04
Loss = 1.3673e-01, PNorm = 84.7086, GNorm = 0.9274, lr_0 = 9.8903e-04
Loss = 1.3731e-01, PNorm = 84.8312, GNorm = 0.9796, lr_0 = 9.8835e-04
Loss = 1.2453e-01, PNorm = 84.9539, GNorm = 0.7758, lr_0 = 9.8767e-04
Loss = 1.2287e-01, PNorm = 85.0651, GNorm = 0.5938, lr_0 = 9.8700e-04
Loss = 1.3044e-01, PNorm = 85.1833, GNorm = 0.9329, lr_0 = 9.8632e-04
Loss = 1.3706e-01, PNorm = 85.3027, GNorm = 0.5655, lr_0 = 9.8564e-04
Loss = 1.1492e-01, PNorm = 85.4291, GNorm = 0.7389, lr_0 = 9.8497e-04
Loss = 1.3227e-01, PNorm = 85.5644, GNorm = 0.7245, lr_0 = 9.8429e-04
Loss = 1.2318e-01, PNorm = 85.6945, GNorm = 0.8899, lr_0 = 9.8362e-04
Loss = 1.2792e-01, PNorm = 85.8349, GNorm = 0.5897, lr_0 = 9.8295e-04
Loss = 1.3195e-01, PNorm = 85.9713, GNorm = 0.5636, lr_0 = 9.8227e-04
Loss = 1.1086e-01, PNorm = 86.0897, GNorm = 1.2758, lr_0 = 9.8160e-04
Loss = 1.2326e-01, PNorm = 86.2075, GNorm = 0.8519, lr_0 = 9.8093e-04
Loss = 1.3523e-01, PNorm = 86.3243, GNorm = 0.8145, lr_0 = 9.8026e-04
Loss = 1.2397e-01, PNorm = 86.4418, GNorm = 0.7959, lr_0 = 9.7958e-04
Loss = 1.2128e-01, PNorm = 86.5625, GNorm = 0.6707, lr_0 = 9.7891e-04
Loss = 1.3805e-01, PNorm = 86.6838, GNorm = 0.7618, lr_0 = 9.7824e-04
Loss = 1.4382e-01, PNorm = 86.7951, GNorm = 0.5613, lr_0 = 9.7757e-04
Loss = 1.2553e-01, PNorm = 86.9285, GNorm = 0.7013, lr_0 = 9.7690e-04
Loss = 1.2824e-01, PNorm = 87.0528, GNorm = 0.6061, lr_0 = 9.7623e-04
Loss = 1.3756e-01, PNorm = 87.1809, GNorm = 0.9859, lr_0 = 9.7556e-04
Loss = 1.4137e-01, PNorm = 87.3217, GNorm = 0.6432, lr_0 = 9.7490e-04
Loss = 1.3190e-01, PNorm = 87.4517, GNorm = 0.4247, lr_0 = 9.7423e-04
Loss = 1.0943e-01, PNorm = 87.5800, GNorm = 0.5507, lr_0 = 9.7356e-04
Loss = 1.2914e-01, PNorm = 87.7002, GNorm = 0.6632, lr_0 = 9.7289e-04
Loss = 1.3933e-01, PNorm = 87.8177, GNorm = 0.5783, lr_0 = 9.7223e-04
Loss = 1.4540e-01, PNorm = 87.9615, GNorm = 0.7820, lr_0 = 9.7156e-04
Loss = 1.3208e-01, PNorm = 88.0992, GNorm = 0.6315, lr_0 = 9.7090e-04
Loss = 1.2324e-01, PNorm = 88.2315, GNorm = 0.8070, lr_0 = 9.7023e-04
Loss = 1.3618e-01, PNorm = 88.3821, GNorm = 0.8783, lr_0 = 9.6957e-04
Loss = 1.4454e-01, PNorm = 88.5146, GNorm = 0.4968, lr_0 = 9.6890e-04
Loss = 1.3093e-01, PNorm = 88.6595, GNorm = 0.7094, lr_0 = 9.6824e-04
Loss = 1.2846e-01, PNorm = 88.7815, GNorm = 0.5821, lr_0 = 9.6757e-04
Loss = 1.3651e-01, PNorm = 88.9078, GNorm = 0.9572, lr_0 = 9.6691e-04
Loss = 1.2312e-01, PNorm = 89.0399, GNorm = 0.7368, lr_0 = 9.6625e-04
Loss = 1.1872e-01, PNorm = 89.1662, GNorm = 1.0363, lr_0 = 9.6559e-04
Loss = 1.3345e-01, PNorm = 89.2888, GNorm = 0.8198, lr_0 = 9.6493e-04
Loss = 1.2703e-01, PNorm = 89.4236, GNorm = 1.2620, lr_0 = 9.6427e-04
Loss = 1.3850e-01, PNorm = 89.5395, GNorm = 1.0073, lr_0 = 9.6360e-04
Loss = 1.2687e-01, PNorm = 89.6824, GNorm = 0.8534, lr_0 = 9.6294e-04
Loss = 1.2962e-01, PNorm = 89.8132, GNorm = 0.5102, lr_0 = 9.6228e-04
Loss = 1.3970e-01, PNorm = 89.9549, GNorm = 1.0110, lr_0 = 9.6163e-04
Loss = 1.4352e-01, PNorm = 90.0961, GNorm = 0.8622, lr_0 = 9.6097e-04
Loss = 1.4922e-01, PNorm = 90.2430, GNorm = 0.5306, lr_0 = 9.6031e-04
Loss = 1.3243e-01, PNorm = 90.3868, GNorm = 0.4517, lr_0 = 9.5965e-04
Loss = 1.3152e-01, PNorm = 90.5268, GNorm = 0.5695, lr_0 = 9.5899e-04
Loss = 1.3355e-01, PNorm = 90.6409, GNorm = 0.7793, lr_0 = 9.5834e-04
Loss = 1.3254e-01, PNorm = 90.7729, GNorm = 0.5759, lr_0 = 9.5768e-04
Loss = 1.4132e-01, PNorm = 90.9106, GNorm = 1.1203, lr_0 = 9.5702e-04
Loss = 1.4498e-01, PNorm = 91.0564, GNorm = 0.6114, lr_0 = 9.5637e-04
Loss = 1.4166e-01, PNorm = 91.1871, GNorm = 0.7146, lr_0 = 9.5571e-04
Loss = 1.3470e-01, PNorm = 91.3305, GNorm = 0.6568, lr_0 = 9.5506e-04
Loss = 1.2987e-01, PNorm = 91.4718, GNorm = 0.6372, lr_0 = 9.5440e-04
Loss = 1.4348e-01, PNorm = 91.6084, GNorm = 0.9109, lr_0 = 9.5375e-04
Loss = 1.3040e-01, PNorm = 91.7491, GNorm = 0.8349, lr_0 = 9.5310e-04
Loss = 1.5547e-01, PNorm = 91.8718, GNorm = 0.9261, lr_0 = 9.5244e-04
Loss = 1.4022e-01, PNorm = 92.0176, GNorm = 0.7342, lr_0 = 9.5179e-04
Loss = 1.4637e-01, PNorm = 92.1431, GNorm = 0.7737, lr_0 = 9.5114e-04
Loss = 1.6094e-01, PNorm = 92.2893, GNorm = 1.1846, lr_0 = 9.5049e-04
Loss = 1.6731e-01, PNorm = 92.4239, GNorm = 0.9170, lr_0 = 9.4984e-04
Loss = 1.3199e-01, PNorm = 92.5845, GNorm = 1.0220, lr_0 = 9.4919e-04
Loss = 1.3170e-01, PNorm = 92.7085, GNorm = 0.7273, lr_0 = 9.4854e-04
Loss = 1.4437e-01, PNorm = 92.8421, GNorm = 0.7588, lr_0 = 9.4789e-04
Loss = 1.3753e-01, PNorm = 92.9684, GNorm = 1.0663, lr_0 = 9.4724e-04
Loss = 1.3605e-01, PNorm = 93.1025, GNorm = 0.6943, lr_0 = 9.4659e-04
Loss = 1.5250e-01, PNorm = 93.2424, GNorm = 0.5799, lr_0 = 9.4594e-04
Loss = 1.3606e-01, PNorm = 93.3719, GNorm = 0.5610, lr_0 = 9.4529e-04
Loss = 1.5467e-01, PNorm = 93.5063, GNorm = 0.6824, lr_0 = 9.4464e-04
Loss = 1.5678e-01, PNorm = 93.6090, GNorm = 0.8509, lr_0 = 9.4400e-04
Loss = 1.4818e-01, PNorm = 93.7291, GNorm = 0.5996, lr_0 = 9.4335e-04
Loss = 1.4055e-01, PNorm = 93.8442, GNorm = 0.6036, lr_0 = 9.4270e-04
Loss = 1.3058e-01, PNorm = 93.9700, GNorm = 1.3068, lr_0 = 9.4206e-04
Loss = 1.4558e-01, PNorm = 94.0726, GNorm = 0.7745, lr_0 = 9.4141e-04
Loss = 1.3556e-01, PNorm = 94.1971, GNorm = 0.7744, lr_0 = 9.4077e-04
Loss = 1.4896e-01, PNorm = 94.3032, GNorm = 0.7217, lr_0 = 9.4012e-04
Loss = 1.3604e-01, PNorm = 94.4276, GNorm = 0.4360, lr_0 = 9.3948e-04
Loss = 1.5045e-01, PNorm = 94.5446, GNorm = 0.5606, lr_0 = 9.3884e-04
Loss = 1.7112e-01, PNorm = 94.6701, GNorm = 0.5566, lr_0 = 9.3819e-04
Loss = 1.6492e-01, PNorm = 94.7996, GNorm = 1.0646, lr_0 = 9.3755e-04
Loss = 1.5536e-01, PNorm = 94.9430, GNorm = 0.6538, lr_0 = 9.3691e-04
Loss = 1.4175e-01, PNorm = 95.0649, GNorm = 0.7044, lr_0 = 9.3627e-04
Loss = 1.4131e-01, PNorm = 95.2002, GNorm = 0.4434, lr_0 = 9.3562e-04
Loss = 1.3465e-01, PNorm = 95.3247, GNorm = 0.9675, lr_0 = 9.3498e-04
Loss = 1.3633e-01, PNorm = 95.4578, GNorm = 0.6780, lr_0 = 9.3434e-04
Loss = 1.4515e-01, PNorm = 95.5842, GNorm = 1.1353, lr_0 = 9.3370e-04
Loss = 1.5521e-01, PNorm = 95.7190, GNorm = 0.8595, lr_0 = 9.3306e-04
Loss = 1.4226e-01, PNorm = 95.8497, GNorm = 0.6877, lr_0 = 9.3242e-04
Loss = 1.4310e-01, PNorm = 95.9746, GNorm = 0.7869, lr_0 = 9.3178e-04
Loss = 1.4096e-01, PNorm = 96.1027, GNorm = 0.5101, lr_0 = 9.3115e-04
Loss = 1.4269e-01, PNorm = 96.2374, GNorm = 0.6623, lr_0 = 9.3051e-04
Loss = 1.5148e-01, PNorm = 96.3534, GNorm = 1.0587, lr_0 = 9.2987e-04
Loss = 1.5072e-01, PNorm = 96.4926, GNorm = 0.6957, lr_0 = 9.2923e-04
Loss = 1.3272e-01, PNorm = 96.6146, GNorm = 0.4945, lr_0 = 9.2860e-04
Loss = 1.4782e-01, PNorm = 96.7445, GNorm = 0.7412, lr_0 = 9.2796e-04
Loss = 1.4140e-01, PNorm = 96.8706, GNorm = 0.7650, lr_0 = 9.2733e-04
Loss = 1.4775e-01, PNorm = 97.0156, GNorm = 0.9237, lr_0 = 9.2669e-04
Loss = 1.4775e-01, PNorm = 97.1398, GNorm = 0.9157, lr_0 = 9.2606e-04
Loss = 1.6254e-01, PNorm = 97.2814, GNorm = 0.9399, lr_0 = 9.2542e-04
Loss = 1.4213e-01, PNorm = 97.4236, GNorm = 1.1039, lr_0 = 9.2479e-04
Loss = 1.3086e-01, PNorm = 97.5604, GNorm = 0.8205, lr_0 = 9.2415e-04
Loss = 1.3233e-01, PNorm = 97.6874, GNorm = 0.5714, lr_0 = 9.2352e-04
Loss = 1.6582e-01, PNorm = 97.8248, GNorm = 1.0080, lr_0 = 9.2289e-04
Loss = 1.4558e-01, PNorm = 97.9572, GNorm = 0.6646, lr_0 = 9.2226e-04
Loss = 1.6877e-01, PNorm = 98.0950, GNorm = 0.7275, lr_0 = 9.2162e-04
Loss = 1.2545e-01, PNorm = 98.2249, GNorm = 0.6486, lr_0 = 9.2099e-04
Validation mae = 0.297055
Epoch 3
Loss = 9.8138e-02, PNorm = 98.3525, GNorm = 0.8685, lr_0 = 9.2036e-04
Loss = 1.0065e-01, PNorm = 98.4603, GNorm = 0.4872, lr_0 = 9.1973e-04
Loss = 8.6605e-02, PNorm = 98.5561, GNorm = 0.4042, lr_0 = 9.1910e-04
Loss = 8.5694e-02, PNorm = 98.6462, GNorm = 0.4370, lr_0 = 9.1847e-04
Loss = 7.1991e-02, PNorm = 98.7292, GNorm = 0.5171, lr_0 = 9.1784e-04
Loss = 8.5430e-02, PNorm = 98.8076, GNorm = 0.7948, lr_0 = 9.1721e-04
Loss = 8.1841e-02, PNorm = 98.8905, GNorm = 0.7494, lr_0 = 9.1658e-04
Loss = 7.6834e-02, PNorm = 98.9580, GNorm = 0.3760, lr_0 = 9.1596e-04
Loss = 8.8023e-02, PNorm = 99.0417, GNorm = 0.5009, lr_0 = 9.1533e-04
Loss = 7.9033e-02, PNorm = 99.1166, GNorm = 0.6849, lr_0 = 9.1470e-04
Loss = 7.7873e-02, PNorm = 99.1991, GNorm = 0.5636, lr_0 = 9.1408e-04
Loss = 7.6299e-02, PNorm = 99.2792, GNorm = 0.6805, lr_0 = 9.1345e-04
Loss = 8.7199e-02, PNorm = 99.3592, GNorm = 0.4312, lr_0 = 9.1282e-04
Loss = 8.9247e-02, PNorm = 99.4346, GNorm = 0.8291, lr_0 = 9.1220e-04
Loss = 7.5031e-02, PNorm = 99.5162, GNorm = 0.9014, lr_0 = 9.1157e-04
Loss = 8.6617e-02, PNorm = 99.5884, GNorm = 0.7272, lr_0 = 9.1095e-04
Loss = 7.5525e-02, PNorm = 99.6766, GNorm = 0.4289, lr_0 = 9.1032e-04
Loss = 9.5991e-02, PNorm = 99.7505, GNorm = 0.5200, lr_0 = 9.0970e-04
Loss = 7.6010e-02, PNorm = 99.8430, GNorm = 0.4095, lr_0 = 9.0908e-04
Loss = 8.1456e-02, PNorm = 99.9185, GNorm = 0.5402, lr_0 = 9.0846e-04
Loss = 7.7428e-02, PNorm = 100.0088, GNorm = 0.4703, lr_0 = 9.0783e-04
Loss = 8.3280e-02, PNorm = 100.0840, GNorm = 0.7469, lr_0 = 9.0721e-04
Loss = 6.8267e-02, PNorm = 100.1654, GNorm = 0.6304, lr_0 = 9.0659e-04
Loss = 8.6546e-02, PNorm = 100.2442, GNorm = 0.6633, lr_0 = 9.0597e-04
Loss = 7.4750e-02, PNorm = 100.3285, GNorm = 0.3976, lr_0 = 9.0535e-04
Loss = 8.5637e-02, PNorm = 100.4180, GNorm = 0.6061, lr_0 = 9.0473e-04
Loss = 8.5555e-02, PNorm = 100.4859, GNorm = 0.5793, lr_0 = 9.0411e-04
Loss = 8.7197e-02, PNorm = 100.5788, GNorm = 0.6151, lr_0 = 9.0349e-04
Loss = 7.9849e-02, PNorm = 100.6558, GNorm = 0.4081, lr_0 = 9.0287e-04
Loss = 8.2886e-02, PNorm = 100.7444, GNorm = 0.3779, lr_0 = 9.0225e-04
Loss = 7.9266e-02, PNorm = 100.8117, GNorm = 0.4136, lr_0 = 9.0163e-04
Loss = 8.8884e-02, PNorm = 100.9086, GNorm = 0.7291, lr_0 = 9.0102e-04
Loss = 6.8938e-02, PNorm = 100.9881, GNorm = 0.3786, lr_0 = 9.0040e-04
Loss = 7.9032e-02, PNorm = 101.0613, GNorm = 0.5719, lr_0 = 8.9978e-04
Loss = 8.5070e-02, PNorm = 101.1412, GNorm = 0.5906, lr_0 = 8.9916e-04
Loss = 8.3613e-02, PNorm = 101.2330, GNorm = 0.4638, lr_0 = 8.9855e-04
Loss = 7.5811e-02, PNorm = 101.3157, GNorm = 0.6750, lr_0 = 8.9793e-04
Loss = 9.5174e-02, PNorm = 101.4090, GNorm = 0.7972, lr_0 = 8.9732e-04
Loss = 7.5406e-02, PNorm = 101.4918, GNorm = 0.7560, lr_0 = 8.9670e-04
Loss = 7.6823e-02, PNorm = 101.5803, GNorm = 0.4480, lr_0 = 8.9609e-04
Loss = 8.5237e-02, PNorm = 101.6739, GNorm = 0.5826, lr_0 = 8.9548e-04
Loss = 8.4906e-02, PNorm = 101.7571, GNorm = 0.5087, lr_0 = 8.9486e-04
Loss = 9.0246e-02, PNorm = 101.8446, GNorm = 1.0150, lr_0 = 8.9425e-04
Loss = 9.3236e-02, PNorm = 101.9462, GNorm = 0.4921, lr_0 = 8.9364e-04
Loss = 7.7572e-02, PNorm = 102.0313, GNorm = 1.1835, lr_0 = 8.9302e-04
Loss = 9.0973e-02, PNorm = 102.1307, GNorm = 0.5594, lr_0 = 8.9241e-04
Loss = 7.8470e-02, PNorm = 102.2142, GNorm = 0.7094, lr_0 = 8.9180e-04
Loss = 9.2204e-02, PNorm = 102.3137, GNorm = 0.5420, lr_0 = 8.9119e-04
Loss = 8.8168e-02, PNorm = 102.3984, GNorm = 0.3973, lr_0 = 8.9058e-04
Loss = 9.2454e-02, PNorm = 102.5090, GNorm = 0.4508, lr_0 = 8.8997e-04
Loss = 9.0320e-02, PNorm = 102.6025, GNorm = 0.3793, lr_0 = 8.8936e-04
Loss = 7.4900e-02, PNorm = 102.7081, GNorm = 0.5186, lr_0 = 8.8875e-04
Loss = 8.2471e-02, PNorm = 102.7932, GNorm = 0.4931, lr_0 = 8.8814e-04
Loss = 8.5376e-02, PNorm = 102.8888, GNorm = 0.4314, lr_0 = 8.8753e-04
Loss = 8.8417e-02, PNorm = 102.9685, GNorm = 0.7765, lr_0 = 8.8693e-04
Loss = 8.5689e-02, PNorm = 103.0628, GNorm = 0.5322, lr_0 = 8.8632e-04
Loss = 9.5872e-02, PNorm = 103.1554, GNorm = 0.5997, lr_0 = 8.8571e-04
Loss = 8.8273e-02, PNorm = 103.2585, GNorm = 0.4795, lr_0 = 8.8510e-04
Loss = 7.9925e-02, PNorm = 103.3502, GNorm = 0.4790, lr_0 = 8.8450e-04
Loss = 8.4031e-02, PNorm = 103.4443, GNorm = 0.4308, lr_0 = 8.8389e-04
Loss = 9.1618e-02, PNorm = 103.5406, GNorm = 0.7124, lr_0 = 8.8329e-04
Loss = 8.4700e-02, PNorm = 103.6401, GNorm = 0.8425, lr_0 = 8.8268e-04
Loss = 9.6312e-02, PNorm = 103.7302, GNorm = 0.5280, lr_0 = 8.8208e-04
Loss = 9.1825e-02, PNorm = 103.8382, GNorm = 0.9811, lr_0 = 8.8147e-04
Loss = 9.1581e-02, PNorm = 103.9430, GNorm = 0.4670, lr_0 = 8.8087e-04
Loss = 9.3053e-02, PNorm = 104.0537, GNorm = 0.3964, lr_0 = 8.8026e-04
Loss = 9.6442e-02, PNorm = 104.1646, GNorm = 0.5185, lr_0 = 8.7966e-04
Loss = 8.7622e-02, PNorm = 104.2748, GNorm = 0.3625, lr_0 = 8.7906e-04
Loss = 8.9162e-02, PNorm = 104.3680, GNorm = 0.9114, lr_0 = 8.7846e-04
Loss = 7.7979e-02, PNorm = 104.4655, GNorm = 0.3978, lr_0 = 8.7785e-04
Loss = 8.8734e-02, PNorm = 104.5627, GNorm = 0.6505, lr_0 = 8.7725e-04
Loss = 8.8169e-02, PNorm = 104.6614, GNorm = 0.5132, lr_0 = 8.7665e-04
Loss = 9.0341e-02, PNorm = 104.7604, GNorm = 0.6396, lr_0 = 8.7605e-04
Loss = 7.9961e-02, PNorm = 104.8514, GNorm = 0.4728, lr_0 = 8.7545e-04
Loss = 8.2599e-02, PNorm = 104.9529, GNorm = 0.4513, lr_0 = 8.7485e-04
Loss = 8.4164e-02, PNorm = 105.0523, GNorm = 1.4242, lr_0 = 8.7425e-04
Loss = 9.5886e-02, PNorm = 105.1554, GNorm = 1.0937, lr_0 = 8.7365e-04
Loss = 8.7755e-02, PNorm = 105.2508, GNorm = 0.8558, lr_0 = 8.7306e-04
Loss = 9.1320e-02, PNorm = 105.3546, GNorm = 0.7670, lr_0 = 8.7246e-04
Loss = 9.4962e-02, PNorm = 105.4532, GNorm = 1.5895, lr_0 = 8.7186e-04
Loss = 9.0055e-02, PNorm = 105.5592, GNorm = 0.6611, lr_0 = 8.7126e-04
Loss = 8.7692e-02, PNorm = 105.6517, GNorm = 0.4928, lr_0 = 8.7067e-04
Loss = 9.9518e-02, PNorm = 105.7562, GNorm = 0.4895, lr_0 = 8.7007e-04
Loss = 8.9994e-02, PNorm = 105.8621, GNorm = 0.4531, lr_0 = 8.6947e-04
Loss = 8.7523e-02, PNorm = 105.9717, GNorm = 1.0869, lr_0 = 8.6888e-04
Loss = 9.6001e-02, PNorm = 106.0707, GNorm = 0.6665, lr_0 = 8.6828e-04
Loss = 8.4468e-02, PNorm = 106.1755, GNorm = 0.4139, lr_0 = 8.6769e-04
Loss = 9.8643e-02, PNorm = 106.2852, GNorm = 1.5193, lr_0 = 8.6709e-04
Loss = 1.0586e-01, PNorm = 106.3897, GNorm = 0.7186, lr_0 = 8.6650e-04
Loss = 9.3140e-02, PNorm = 106.5170, GNorm = 0.6134, lr_0 = 8.6590e-04
Loss = 9.2329e-02, PNorm = 106.6317, GNorm = 0.4900, lr_0 = 8.6531e-04
Loss = 7.9859e-02, PNorm = 106.7348, GNorm = 0.8225, lr_0 = 8.6472e-04
Loss = 1.0115e-01, PNorm = 106.8358, GNorm = 0.6030, lr_0 = 8.6413e-04
Loss = 1.0413e-01, PNorm = 106.9426, GNorm = 0.9325, lr_0 = 8.6353e-04
Loss = 1.0312e-01, PNorm = 107.0459, GNorm = 0.6033, lr_0 = 8.6294e-04
Loss = 9.9993e-02, PNorm = 107.1649, GNorm = 0.4655, lr_0 = 8.6235e-04
Loss = 1.0540e-01, PNorm = 107.2790, GNorm = 0.7238, lr_0 = 8.6176e-04
Loss = 9.3884e-02, PNorm = 107.3972, GNorm = 0.4249, lr_0 = 8.6117e-04
Loss = 1.0202e-01, PNorm = 107.5041, GNorm = 0.3648, lr_0 = 8.6058e-04
Loss = 8.8247e-02, PNorm = 107.6100, GNorm = 0.7356, lr_0 = 8.5999e-04
Loss = 8.7962e-02, PNorm = 107.7059, GNorm = 0.9054, lr_0 = 8.5940e-04
Loss = 9.1159e-02, PNorm = 107.8079, GNorm = 0.4606, lr_0 = 8.5881e-04
Loss = 8.8175e-02, PNorm = 107.9061, GNorm = 0.7582, lr_0 = 8.5823e-04
Loss = 1.0608e-01, PNorm = 108.0117, GNorm = 0.3693, lr_0 = 8.5764e-04
Loss = 1.0126e-01, PNorm = 108.1268, GNorm = 0.9359, lr_0 = 8.5705e-04
Loss = 9.2760e-02, PNorm = 108.2443, GNorm = 0.4485, lr_0 = 8.5646e-04
Loss = 8.5359e-02, PNorm = 108.3454, GNorm = 0.9054, lr_0 = 8.5588e-04
Loss = 9.1917e-02, PNorm = 108.4433, GNorm = 0.4384, lr_0 = 8.5529e-04
Loss = 9.9451e-02, PNorm = 108.5373, GNorm = 0.6007, lr_0 = 8.5470e-04
Loss = 9.9148e-02, PNorm = 108.6489, GNorm = 0.9606, lr_0 = 8.5412e-04
Loss = 9.7891e-02, PNorm = 108.7514, GNorm = 0.4203, lr_0 = 8.5353e-04
Loss = 9.3809e-02, PNorm = 108.8621, GNorm = 0.7305, lr_0 = 8.5295e-04
Loss = 1.1495e-01, PNorm = 108.9709, GNorm = 1.0431, lr_0 = 8.5236e-04
Loss = 9.2508e-02, PNorm = 109.1026, GNorm = 0.7495, lr_0 = 8.5178e-04
Loss = 9.9894e-02, PNorm = 109.2123, GNorm = 0.8893, lr_0 = 8.5120e-04
Loss = 8.8048e-02, PNorm = 109.3299, GNorm = 0.8024, lr_0 = 8.5061e-04
Loss = 1.0116e-01, PNorm = 109.4328, GNorm = 1.4223, lr_0 = 8.5003e-04
Loss = 1.0436e-01, PNorm = 109.5547, GNorm = 0.5694, lr_0 = 8.4945e-04
Loss = 9.3888e-02, PNorm = 109.6582, GNorm = 0.4846, lr_0 = 8.4887e-04
Loss = 9.1617e-02, PNorm = 109.7679, GNorm = 0.4915, lr_0 = 8.4828e-04
Validation mae = 0.291626
Epoch 4
Loss = 6.3170e-02, PNorm = 109.8472, GNorm = 0.4256, lr_0 = 8.4770e-04
Loss = 6.0557e-02, PNorm = 109.9187, GNorm = 0.4363, lr_0 = 8.4712e-04
Loss = 6.9766e-02, PNorm = 109.9847, GNorm = 0.6001, lr_0 = 8.4654e-04
Loss = 6.0768e-02, PNorm = 110.0534, GNorm = 0.5703, lr_0 = 8.4596e-04
Loss = 5.5290e-02, PNorm = 110.1242, GNorm = 0.6813, lr_0 = 8.4538e-04
Loss = 6.6389e-02, PNorm = 110.1921, GNorm = 0.2870, lr_0 = 8.4480e-04
Loss = 4.4981e-02, PNorm = 110.2579, GNorm = 0.2648, lr_0 = 8.4423e-04
Loss = 5.0897e-02, PNorm = 110.3217, GNorm = 0.4170, lr_0 = 8.4365e-04
Loss = 5.8929e-02, PNorm = 110.3919, GNorm = 0.6711, lr_0 = 8.4307e-04
Loss = 5.4899e-02, PNorm = 110.4498, GNorm = 0.5346, lr_0 = 8.4249e-04
Loss = 5.2502e-02, PNorm = 110.5188, GNorm = 0.3237, lr_0 = 8.4191e-04
Loss = 5.0518e-02, PNorm = 110.5806, GNorm = 0.5985, lr_0 = 8.4134e-04
Loss = 5.9965e-02, PNorm = 110.6416, GNorm = 0.3878, lr_0 = 8.4076e-04
Loss = 6.3242e-02, PNorm = 110.7034, GNorm = 0.2861, lr_0 = 8.4019e-04
Loss = 5.4505e-02, PNorm = 110.7799, GNorm = 0.6682, lr_0 = 8.3961e-04
Loss = 5.6926e-02, PNorm = 110.8402, GNorm = 0.3475, lr_0 = 8.3903e-04
Loss = 5.6080e-02, PNorm = 110.9106, GNorm = 0.4308, lr_0 = 8.3846e-04
Loss = 5.9482e-02, PNorm = 110.9750, GNorm = 0.8813, lr_0 = 8.3789e-04
Loss = 5.7743e-02, PNorm = 111.0553, GNorm = 1.2315, lr_0 = 8.3731e-04
Loss = 4.8605e-02, PNorm = 111.1140, GNorm = 0.3815, lr_0 = 8.3674e-04
Loss = 5.2384e-02, PNorm = 111.1833, GNorm = 0.3934, lr_0 = 8.3616e-04
Loss = 5.2448e-02, PNorm = 111.2428, GNorm = 0.5029, lr_0 = 8.3559e-04
Loss = 6.2638e-02, PNorm = 111.3042, GNorm = 0.4073, lr_0 = 8.3502e-04
Loss = 5.7655e-02, PNorm = 111.3781, GNorm = 0.3595, lr_0 = 8.3445e-04
Loss = 5.3030e-02, PNorm = 111.4460, GNorm = 0.7935, lr_0 = 8.3388e-04
Loss = 5.0482e-02, PNorm = 111.5016, GNorm = 0.4068, lr_0 = 8.3330e-04
Loss = 5.0978e-02, PNorm = 111.5717, GNorm = 0.4252, lr_0 = 8.3273e-04
Loss = 6.0979e-02, PNorm = 111.6239, GNorm = 0.7867, lr_0 = 8.3216e-04
Loss = 5.8445e-02, PNorm = 111.6972, GNorm = 0.7277, lr_0 = 8.3159e-04
Loss = 5.6413e-02, PNorm = 111.7660, GNorm = 0.5766, lr_0 = 8.3102e-04
Loss = 5.7530e-02, PNorm = 111.8423, GNorm = 0.7437, lr_0 = 8.3045e-04
Loss = 5.7795e-02, PNorm = 111.9047, GNorm = 0.6724, lr_0 = 8.2988e-04
Loss = 5.7224e-02, PNorm = 111.9723, GNorm = 0.5440, lr_0 = 8.2932e-04
Loss = 6.2428e-02, PNorm = 112.0389, GNorm = 0.3606, lr_0 = 8.2875e-04
Loss = 5.7409e-02, PNorm = 112.1074, GNorm = 0.8336, lr_0 = 8.2818e-04
Loss = 7.1772e-02, PNorm = 112.1790, GNorm = 0.4571, lr_0 = 8.2761e-04
Loss = 5.0663e-02, PNorm = 112.2605, GNorm = 0.6036, lr_0 = 8.2705e-04
Loss = 5.1980e-02, PNorm = 112.3387, GNorm = 0.3610, lr_0 = 8.2648e-04
Loss = 5.3985e-02, PNorm = 112.4040, GNorm = 1.0481, lr_0 = 8.2591e-04
Loss = 5.3901e-02, PNorm = 112.4743, GNorm = 0.5289, lr_0 = 8.2535e-04
Loss = 5.7318e-02, PNorm = 112.5480, GNorm = 0.3043, lr_0 = 8.2478e-04
Loss = 5.5863e-02, PNorm = 112.6213, GNorm = 0.5362, lr_0 = 8.2422e-04
Loss = 5.7358e-02, PNorm = 112.6860, GNorm = 0.4998, lr_0 = 8.2365e-04
Loss = 5.0723e-02, PNorm = 112.7551, GNorm = 0.3509, lr_0 = 8.2309e-04
Loss = 5.7695e-02, PNorm = 112.8262, GNorm = 0.4971, lr_0 = 8.2252e-04
Loss = 5.8346e-02, PNorm = 112.8994, GNorm = 0.6792, lr_0 = 8.2196e-04
Loss = 5.8949e-02, PNorm = 112.9779, GNorm = 0.4492, lr_0 = 8.2140e-04
Loss = 6.0145e-02, PNorm = 113.0514, GNorm = 0.3891, lr_0 = 8.2084e-04
Loss = 5.6920e-02, PNorm = 113.1338, GNorm = 0.4453, lr_0 = 8.2027e-04
Loss = 5.6868e-02, PNorm = 113.2021, GNorm = 0.3414, lr_0 = 8.1971e-04
Loss = 6.0226e-02, PNorm = 113.2835, GNorm = 0.4662, lr_0 = 8.1915e-04
Loss = 5.4504e-02, PNorm = 113.3648, GNorm = 0.2976, lr_0 = 8.1859e-04
Loss = 7.4133e-02, PNorm = 113.4500, GNorm = 0.5873, lr_0 = 8.1803e-04
Loss = 5.6323e-02, PNorm = 113.5335, GNorm = 0.3477, lr_0 = 8.1747e-04
Loss = 5.6912e-02, PNorm = 113.6163, GNorm = 0.4390, lr_0 = 8.1691e-04
Loss = 6.1738e-02, PNorm = 113.6905, GNorm = 0.5334, lr_0 = 8.1635e-04
Loss = 5.3043e-02, PNorm = 113.7658, GNorm = 0.7155, lr_0 = 8.1579e-04
Loss = 6.2894e-02, PNorm = 113.8430, GNorm = 0.5970, lr_0 = 8.1523e-04
Loss = 5.9313e-02, PNorm = 113.9194, GNorm = 0.5978, lr_0 = 8.1467e-04
Loss = 6.0810e-02, PNorm = 114.0020, GNorm = 0.3789, lr_0 = 8.1411e-04
Loss = 6.3975e-02, PNorm = 114.0862, GNorm = 0.5249, lr_0 = 8.1355e-04
Loss = 5.4859e-02, PNorm = 114.1680, GNorm = 0.2843, lr_0 = 8.1300e-04
Loss = 6.1685e-02, PNorm = 114.2452, GNorm = 0.6522, lr_0 = 8.1244e-04
Loss = 6.5125e-02, PNorm = 114.3192, GNorm = 1.4289, lr_0 = 8.1188e-04
Loss = 6.5674e-02, PNorm = 114.4062, GNorm = 0.4626, lr_0 = 8.1133e-04
Loss = 5.5217e-02, PNorm = 114.4868, GNorm = 0.2731, lr_0 = 8.1077e-04
Loss = 6.3490e-02, PNorm = 114.5695, GNorm = 0.5965, lr_0 = 8.1022e-04
Loss = 6.6315e-02, PNorm = 114.6528, GNorm = 0.8768, lr_0 = 8.0966e-04
Loss = 5.7239e-02, PNorm = 114.7423, GNorm = 0.2999, lr_0 = 8.0911e-04
Loss = 5.5125e-02, PNorm = 114.8203, GNorm = 0.4609, lr_0 = 8.0855e-04
Loss = 5.9904e-02, PNorm = 114.9100, GNorm = 0.8364, lr_0 = 8.0800e-04
Loss = 5.9376e-02, PNorm = 114.9925, GNorm = 0.6018, lr_0 = 8.0745e-04
Loss = 5.5578e-02, PNorm = 115.0833, GNorm = 0.7129, lr_0 = 8.0689e-04
Loss = 5.9584e-02, PNorm = 115.1494, GNorm = 0.4616, lr_0 = 8.0634e-04
Loss = 6.2797e-02, PNorm = 115.2315, GNorm = 0.3290, lr_0 = 8.0579e-04
Loss = 6.1902e-02, PNorm = 115.3145, GNorm = 0.5724, lr_0 = 8.0523e-04
Loss = 5.6004e-02, PNorm = 115.3990, GNorm = 0.7303, lr_0 = 8.0468e-04
Loss = 6.4986e-02, PNorm = 115.4798, GNorm = 0.6247, lr_0 = 8.0413e-04
Loss = 5.7168e-02, PNorm = 115.5616, GNorm = 0.3565, lr_0 = 8.0358e-04
Loss = 6.2140e-02, PNorm = 115.6501, GNorm = 0.5081, lr_0 = 8.0303e-04
Loss = 5.4202e-02, PNorm = 115.7341, GNorm = 0.4050, lr_0 = 8.0248e-04
Loss = 5.8269e-02, PNorm = 115.8141, GNorm = 0.3851, lr_0 = 8.0193e-04
Loss = 6.9470e-02, PNorm = 115.9025, GNorm = 0.7828, lr_0 = 8.0138e-04
Loss = 6.0662e-02, PNorm = 115.9873, GNorm = 0.3947, lr_0 = 8.0083e-04
Loss = 6.5355e-02, PNorm = 116.0712, GNorm = 0.5877, lr_0 = 8.0028e-04
Loss = 5.5136e-02, PNorm = 116.1529, GNorm = 0.3344, lr_0 = 7.9974e-04
Loss = 6.0951e-02, PNorm = 116.2388, GNorm = 0.3491, lr_0 = 7.9919e-04
Loss = 6.7423e-02, PNorm = 116.3226, GNorm = 0.5775, lr_0 = 7.9864e-04
Loss = 6.1187e-02, PNorm = 116.4117, GNorm = 0.5879, lr_0 = 7.9809e-04
Loss = 6.4501e-02, PNorm = 116.4973, GNorm = 0.6214, lr_0 = 7.9755e-04
Loss = 5.4442e-02, PNorm = 116.5844, GNorm = 0.3498, lr_0 = 7.9700e-04
Loss = 6.9490e-02, PNorm = 116.6661, GNorm = 0.5058, lr_0 = 7.9645e-04
Loss = 6.3430e-02, PNorm = 116.7601, GNorm = 0.3413, lr_0 = 7.9591e-04
Loss = 5.9783e-02, PNorm = 116.8487, GNorm = 0.5931, lr_0 = 7.9536e-04
Loss = 7.2160e-02, PNorm = 116.9354, GNorm = 0.7300, lr_0 = 7.9482e-04
Loss = 6.5918e-02, PNorm = 117.0276, GNorm = 0.6087, lr_0 = 7.9427e-04
Loss = 6.2524e-02, PNorm = 117.1174, GNorm = 0.7256, lr_0 = 7.9373e-04
Loss = 6.2064e-02, PNorm = 117.2012, GNorm = 0.3614, lr_0 = 7.9319e-04
Loss = 6.1187e-02, PNorm = 117.2925, GNorm = 0.4396, lr_0 = 7.9264e-04
Loss = 6.3765e-02, PNorm = 117.3779, GNorm = 0.3125, lr_0 = 7.9210e-04
Loss = 6.3827e-02, PNorm = 117.4673, GNorm = 0.5112, lr_0 = 7.9156e-04
Loss = 5.8697e-02, PNorm = 117.5551, GNorm = 0.3934, lr_0 = 7.9101e-04
Loss = 6.1195e-02, PNorm = 117.6403, GNorm = 0.3539, lr_0 = 7.9047e-04
Loss = 6.4947e-02, PNorm = 117.7268, GNorm = 0.3146, lr_0 = 7.8993e-04
Loss = 6.2968e-02, PNorm = 117.8083, GNorm = 0.4087, lr_0 = 7.8939e-04
Loss = 5.8211e-02, PNorm = 117.8927, GNorm = 0.3790, lr_0 = 7.8885e-04
Loss = 6.3605e-02, PNorm = 117.9793, GNorm = 0.4480, lr_0 = 7.8831e-04
Loss = 6.7261e-02, PNorm = 118.0630, GNorm = 0.4507, lr_0 = 7.8777e-04
Loss = 6.7550e-02, PNorm = 118.1631, GNorm = 0.3938, lr_0 = 7.8723e-04
Loss = 6.2017e-02, PNorm = 118.2552, GNorm = 0.3218, lr_0 = 7.8669e-04
Loss = 5.7154e-02, PNorm = 118.3563, GNorm = 0.5939, lr_0 = 7.8615e-04
Loss = 7.3636e-02, PNorm = 118.4613, GNorm = 0.7031, lr_0 = 7.8561e-04
Loss = 6.9109e-02, PNorm = 118.5692, GNorm = 0.3342, lr_0 = 7.8507e-04
Loss = 5.9938e-02, PNorm = 118.6518, GNorm = 0.6151, lr_0 = 7.8454e-04
Loss = 6.0448e-02, PNorm = 118.7387, GNorm = 0.8141, lr_0 = 7.8400e-04
Loss = 5.6778e-02, PNorm = 118.8257, GNorm = 0.4565, lr_0 = 7.8346e-04
Loss = 6.7926e-02, PNorm = 118.9186, GNorm = 0.3581, lr_0 = 7.8293e-04
Loss = 7.2534e-02, PNorm = 119.0126, GNorm = 1.1068, lr_0 = 7.8239e-04
Loss = 6.2396e-02, PNorm = 119.1115, GNorm = 0.6193, lr_0 = 7.8185e-04
Loss = 7.4394e-02, PNorm = 119.2059, GNorm = 0.5318, lr_0 = 7.8132e-04
Validation mae = 0.291186
Epoch 5
Loss = 5.1783e-02, PNorm = 119.2955, GNorm = 0.4289, lr_0 = 7.8078e-04
Loss = 4.7520e-02, PNorm = 119.3583, GNorm = 0.5984, lr_0 = 7.8025e-04
Loss = 4.6576e-02, PNorm = 119.4210, GNorm = 0.2843, lr_0 = 7.7971e-04
Loss = 4.6451e-02, PNorm = 119.4803, GNorm = 0.3903, lr_0 = 7.7918e-04
Loss = 4.4707e-02, PNorm = 119.5312, GNorm = 0.4597, lr_0 = 7.7864e-04
Loss = 4.6809e-02, PNorm = 119.5928, GNorm = 0.7050, lr_0 = 7.7811e-04
Loss = 4.2078e-02, PNorm = 119.6518, GNorm = 0.2112, lr_0 = 7.7758e-04
Loss = 4.5307e-02, PNorm = 119.7136, GNorm = 0.5547, lr_0 = 7.7705e-04
Loss = 4.3092e-02, PNorm = 119.7712, GNorm = 0.6715, lr_0 = 7.7651e-04
Loss = 3.9656e-02, PNorm = 119.8205, GNorm = 0.3840, lr_0 = 7.7598e-04
Loss = 3.7379e-02, PNorm = 119.8813, GNorm = 0.3587, lr_0 = 7.7545e-04
Loss = 4.4056e-02, PNorm = 119.9381, GNorm = 0.1784, lr_0 = 7.7492e-04
Loss = 3.5178e-02, PNorm = 119.9897, GNorm = 0.4083, lr_0 = 7.7439e-04
Loss = 4.8283e-02, PNorm = 120.0509, GNorm = 0.8529, lr_0 = 7.7386e-04
Loss = 4.5962e-02, PNorm = 120.1112, GNorm = 0.2739, lr_0 = 7.7333e-04
Loss = 3.8583e-02, PNorm = 120.1674, GNorm = 0.4783, lr_0 = 7.7280e-04
Loss = 3.8271e-02, PNorm = 120.2214, GNorm = 0.3996, lr_0 = 7.7227e-04
Loss = 3.7476e-02, PNorm = 120.2764, GNorm = 0.4115, lr_0 = 7.7174e-04
Loss = 3.7733e-02, PNorm = 120.3330, GNorm = 0.3847, lr_0 = 7.7121e-04
Loss = 4.3325e-02, PNorm = 120.3978, GNorm = 0.6607, lr_0 = 7.7068e-04
Loss = 3.9232e-02, PNorm = 120.4582, GNorm = 0.6473, lr_0 = 7.7015e-04
Loss = 4.1745e-02, PNorm = 120.5203, GNorm = 0.2231, lr_0 = 7.6963e-04
Loss = 4.2031e-02, PNorm = 120.5765, GNorm = 0.6147, lr_0 = 7.6910e-04
Loss = 4.6234e-02, PNorm = 120.6333, GNorm = 0.3826, lr_0 = 7.6857e-04
Loss = 4.1022e-02, PNorm = 120.6923, GNorm = 0.3793, lr_0 = 7.6805e-04
Loss = 4.9370e-02, PNorm = 120.7577, GNorm = 0.3449, lr_0 = 7.6752e-04
Loss = 4.0216e-02, PNorm = 120.8225, GNorm = 0.4059, lr_0 = 7.6699e-04
Loss = 5.2667e-02, PNorm = 120.8786, GNorm = 0.3624, lr_0 = 7.6647e-04
Loss = 4.7759e-02, PNorm = 120.9456, GNorm = 0.5100, lr_0 = 7.6594e-04
Loss = 4.4102e-02, PNorm = 121.0049, GNorm = 0.4627, lr_0 = 7.6542e-04
Loss = 4.0501e-02, PNorm = 121.0778, GNorm = 0.2515, lr_0 = 7.6489e-04
Loss = 4.2353e-02, PNorm = 121.1406, GNorm = 1.0289, lr_0 = 7.6437e-04
Loss = 4.3524e-02, PNorm = 121.2083, GNorm = 0.2577, lr_0 = 7.6385e-04
Loss = 3.8075e-02, PNorm = 121.2702, GNorm = 0.6049, lr_0 = 7.6332e-04
Loss = 3.3208e-02, PNorm = 121.3368, GNorm = 0.6915, lr_0 = 7.6280e-04
Loss = 4.4675e-02, PNorm = 121.3929, GNorm = 0.9176, lr_0 = 7.6228e-04
Loss = 4.2728e-02, PNorm = 121.4565, GNorm = 0.4716, lr_0 = 7.6176e-04
Loss = 4.5522e-02, PNorm = 121.5316, GNorm = 0.4056, lr_0 = 7.6123e-04
Loss = 4.0571e-02, PNorm = 121.5946, GNorm = 0.3786, lr_0 = 7.6071e-04
Loss = 3.8700e-02, PNorm = 121.6635, GNorm = 0.3828, lr_0 = 7.6019e-04
Loss = 3.4366e-02, PNorm = 121.7212, GNorm = 0.2664, lr_0 = 7.5967e-04
Loss = 4.5343e-02, PNorm = 121.7853, GNorm = 0.6015, lr_0 = 7.5915e-04
Loss = 4.1494e-02, PNorm = 121.8444, GNorm = 0.5703, lr_0 = 7.5863e-04
Loss = 4.8594e-02, PNorm = 121.9141, GNorm = 0.2792, lr_0 = 7.5811e-04
Loss = 3.5832e-02, PNorm = 121.9846, GNorm = 0.6076, lr_0 = 7.5759e-04
Loss = 4.3120e-02, PNorm = 122.0507, GNorm = 0.4076, lr_0 = 7.5707e-04
Loss = 4.2145e-02, PNorm = 122.1205, GNorm = 0.6701, lr_0 = 7.5655e-04
Loss = 3.6578e-02, PNorm = 122.2004, GNorm = 0.3833, lr_0 = 7.5603e-04
Loss = 4.3642e-02, PNorm = 122.2673, GNorm = 0.4712, lr_0 = 7.5552e-04
Loss = 4.3221e-02, PNorm = 122.3332, GNorm = 0.4926, lr_0 = 7.5500e-04
Loss = 4.6768e-02, PNorm = 122.4023, GNorm = 0.3467, lr_0 = 7.5448e-04
Loss = 4.0054e-02, PNorm = 122.4662, GNorm = 0.7051, lr_0 = 7.5397e-04
Loss = 4.0434e-02, PNorm = 122.5391, GNorm = 0.6291, lr_0 = 7.5345e-04
Loss = 4.3771e-02, PNorm = 122.6124, GNorm = 0.2676, lr_0 = 7.5293e-04
Loss = 4.1338e-02, PNorm = 122.6872, GNorm = 0.3142, lr_0 = 7.5242e-04
Loss = 4.4584e-02, PNorm = 122.7493, GNorm = 0.5470, lr_0 = 7.5190e-04
Loss = 3.7383e-02, PNorm = 122.8181, GNorm = 0.4025, lr_0 = 7.5139e-04
Loss = 3.6373e-02, PNorm = 122.8838, GNorm = 0.2771, lr_0 = 7.5087e-04
Loss = 4.2735e-02, PNorm = 122.9420, GNorm = 0.7121, lr_0 = 7.5036e-04
Loss = 4.4283e-02, PNorm = 123.0102, GNorm = 0.6782, lr_0 = 7.4984e-04
Loss = 4.9756e-02, PNorm = 123.0683, GNorm = 0.8337, lr_0 = 7.4933e-04
Loss = 4.6527e-02, PNorm = 123.1414, GNorm = 0.4179, lr_0 = 7.4882e-04
Loss = 4.1436e-02, PNorm = 123.2090, GNorm = 0.3596, lr_0 = 7.4830e-04
Loss = 4.3752e-02, PNorm = 123.2882, GNorm = 0.6087, lr_0 = 7.4779e-04
Loss = 4.7140e-02, PNorm = 123.3622, GNorm = 0.6984, lr_0 = 7.4728e-04
Loss = 4.1382e-02, PNorm = 123.4403, GNorm = 0.6248, lr_0 = 7.4677e-04
Loss = 4.3517e-02, PNorm = 123.5110, GNorm = 0.6530, lr_0 = 7.4625e-04
Loss = 3.6265e-02, PNorm = 123.5814, GNorm = 0.2773, lr_0 = 7.4574e-04
Loss = 3.9754e-02, PNorm = 123.6485, GNorm = 0.5306, lr_0 = 7.4523e-04
Loss = 4.5746e-02, PNorm = 123.7185, GNorm = 0.5173, lr_0 = 7.4472e-04
Loss = 4.5929e-02, PNorm = 123.7922, GNorm = 0.3914, lr_0 = 7.4421e-04
Loss = 4.3758e-02, PNorm = 123.8626, GNorm = 0.3094, lr_0 = 7.4370e-04
Loss = 3.9076e-02, PNorm = 123.9336, GNorm = 0.7934, lr_0 = 7.4319e-04
Loss = 3.8781e-02, PNorm = 124.0041, GNorm = 0.2204, lr_0 = 7.4268e-04
Loss = 4.7667e-02, PNorm = 124.0711, GNorm = 0.2951, lr_0 = 7.4217e-04
Loss = 3.7774e-02, PNorm = 124.1410, GNorm = 0.4133, lr_0 = 7.4167e-04
Loss = 4.1756e-02, PNorm = 124.2161, GNorm = 0.9426, lr_0 = 7.4116e-04
Loss = 4.0717e-02, PNorm = 124.2888, GNorm = 0.5948, lr_0 = 7.4065e-04
Loss = 4.0059e-02, PNorm = 124.3634, GNorm = 0.2313, lr_0 = 7.4014e-04
Loss = 3.8296e-02, PNorm = 124.4267, GNorm = 0.3294, lr_0 = 7.3964e-04
Loss = 4.6834e-02, PNorm = 124.4910, GNorm = 0.4607, lr_0 = 7.3913e-04
Loss = 5.1393e-02, PNorm = 124.5643, GNorm = 0.6790, lr_0 = 7.3862e-04
Loss = 4.1589e-02, PNorm = 124.6331, GNorm = 0.5149, lr_0 = 7.3812e-04
Loss = 4.8632e-02, PNorm = 124.7160, GNorm = 0.2953, lr_0 = 7.3761e-04
Loss = 4.9736e-02, PNorm = 124.7926, GNorm = 0.7550, lr_0 = 7.3711e-04
Loss = 4.6928e-02, PNorm = 124.8689, GNorm = 0.5994, lr_0 = 7.3660e-04
Loss = 4.8070e-02, PNorm = 124.9447, GNorm = 0.3155, lr_0 = 7.3610e-04
Loss = 4.3969e-02, PNorm = 125.0205, GNorm = 0.5843, lr_0 = 7.3559e-04
Loss = 4.9216e-02, PNorm = 125.0975, GNorm = 0.4933, lr_0 = 7.3509e-04
Loss = 5.0958e-02, PNorm = 125.1782, GNorm = 0.8146, lr_0 = 7.3458e-04
Loss = 5.2429e-02, PNorm = 125.2596, GNorm = 0.3599, lr_0 = 7.3408e-04
Loss = 4.3445e-02, PNorm = 125.3483, GNorm = 0.5878, lr_0 = 7.3358e-04
Loss = 4.5410e-02, PNorm = 125.4315, GNorm = 0.3343, lr_0 = 7.3308e-04
Loss = 4.3782e-02, PNorm = 125.5115, GNorm = 0.3705, lr_0 = 7.3257e-04
Loss = 4.7489e-02, PNorm = 125.5855, GNorm = 0.4145, lr_0 = 7.3207e-04
Loss = 5.0127e-02, PNorm = 125.6639, GNorm = 0.4577, lr_0 = 7.3157e-04
Loss = 4.6374e-02, PNorm = 125.7476, GNorm = 0.4152, lr_0 = 7.3107e-04
Loss = 4.9425e-02, PNorm = 125.8313, GNorm = 0.6496, lr_0 = 7.3057e-04
Loss = 4.4376e-02, PNorm = 125.9193, GNorm = 0.4837, lr_0 = 7.3007e-04
Loss = 4.6335e-02, PNorm = 125.9953, GNorm = 0.4362, lr_0 = 7.2957e-04
Loss = 4.4460e-02, PNorm = 126.0670, GNorm = 0.5765, lr_0 = 7.2907e-04
Loss = 4.9345e-02, PNorm = 126.1412, GNorm = 0.3337, lr_0 = 7.2857e-04
Loss = 4.3519e-02, PNorm = 126.2198, GNorm = 0.5625, lr_0 = 7.2807e-04
Loss = 4.5166e-02, PNorm = 126.2970, GNorm = 0.5899, lr_0 = 7.2757e-04
Loss = 4.5674e-02, PNorm = 126.3769, GNorm = 0.6804, lr_0 = 7.2707e-04
Loss = 4.5818e-02, PNorm = 126.4449, GNorm = 0.3062, lr_0 = 7.2657e-04
Loss = 5.2905e-02, PNorm = 126.5243, GNorm = 0.4118, lr_0 = 7.2608e-04
Loss = 5.0642e-02, PNorm = 126.6046, GNorm = 0.8087, lr_0 = 7.2558e-04
Loss = 5.1622e-02, PNorm = 126.6893, GNorm = 0.2865, lr_0 = 7.2508e-04
Loss = 4.5868e-02, PNorm = 126.7770, GNorm = 0.6120, lr_0 = 7.2458e-04
Loss = 5.1045e-02, PNorm = 126.8566, GNorm = 0.9879, lr_0 = 7.2409e-04
Loss = 5.2708e-02, PNorm = 126.9419, GNorm = 0.3183, lr_0 = 7.2359e-04
Loss = 4.8465e-02, PNorm = 127.0301, GNorm = 0.3507, lr_0 = 7.2310e-04
Loss = 4.4160e-02, PNorm = 127.1054, GNorm = 0.3729, lr_0 = 7.2260e-04
Loss = 5.1807e-02, PNorm = 127.1883, GNorm = 0.3781, lr_0 = 7.2211e-04
Loss = 4.9543e-02, PNorm = 127.2694, GNorm = 0.5504, lr_0 = 7.2161e-04
Loss = 5.3806e-02, PNorm = 127.3596, GNorm = 0.2898, lr_0 = 7.2112e-04
Loss = 5.0773e-02, PNorm = 127.4449, GNorm = 0.3132, lr_0 = 7.2062e-04
Loss = 4.7841e-02, PNorm = 127.5299, GNorm = 0.6050, lr_0 = 7.2013e-04
Loss = 4.8400e-02, PNorm = 127.6180, GNorm = 0.5467, lr_0 = 7.1964e-04
Validation mae = 0.288327
Epoch 6
Loss = 3.9279e-02, PNorm = 127.6858, GNorm = 0.6474, lr_0 = 7.1914e-04
Loss = 3.6884e-02, PNorm = 127.7505, GNorm = 0.2468, lr_0 = 7.1865e-04
Loss = 4.1339e-02, PNorm = 127.7936, GNorm = 0.4772, lr_0 = 7.1816e-04
Loss = 3.2045e-02, PNorm = 127.8477, GNorm = 0.2754, lr_0 = 7.1767e-04
Loss = 3.7326e-02, PNorm = 127.8974, GNorm = 0.6308, lr_0 = 7.1717e-04
Loss = 3.3908e-02, PNorm = 127.9520, GNorm = 0.2599, lr_0 = 7.1668e-04
Loss = 3.2579e-02, PNorm = 128.0098, GNorm = 0.2183, lr_0 = 7.1619e-04
Loss = 3.2207e-02, PNorm = 128.0617, GNorm = 0.6208, lr_0 = 7.1570e-04
Loss = 2.9454e-02, PNorm = 128.1084, GNorm = 0.3347, lr_0 = 7.1521e-04
Loss = 3.5873e-02, PNorm = 128.1519, GNorm = 1.1344, lr_0 = 7.1472e-04
Loss = 3.4047e-02, PNorm = 128.2027, GNorm = 0.3082, lr_0 = 7.1423e-04
Loss = 2.8862e-02, PNorm = 128.2503, GNorm = 0.6712, lr_0 = 7.1374e-04
Loss = 3.5988e-02, PNorm = 128.3010, GNorm = 0.3703, lr_0 = 7.1325e-04
Loss = 3.2092e-02, PNorm = 128.3503, GNorm = 0.3023, lr_0 = 7.1277e-04
Loss = 2.8988e-02, PNorm = 128.4012, GNorm = 0.7710, lr_0 = 7.1228e-04
Loss = 3.4272e-02, PNorm = 128.4495, GNorm = 0.4077, lr_0 = 7.1179e-04
Loss = 2.9610e-02, PNorm = 128.5002, GNorm = 0.2867, lr_0 = 7.1130e-04
Loss = 3.0879e-02, PNorm = 128.5550, GNorm = 0.5344, lr_0 = 7.1081e-04
Loss = 3.2378e-02, PNorm = 128.6031, GNorm = 0.2135, lr_0 = 7.1033e-04
Loss = 4.1084e-02, PNorm = 128.6488, GNorm = 0.4363, lr_0 = 7.0984e-04
Loss = 3.4366e-02, PNorm = 128.7083, GNorm = 0.4022, lr_0 = 7.0935e-04
Loss = 3.1992e-02, PNorm = 128.7619, GNorm = 0.5807, lr_0 = 7.0887e-04
Loss = 3.0188e-02, PNorm = 128.8194, GNorm = 0.4371, lr_0 = 7.0838e-04
Loss = 3.2286e-02, PNorm = 128.8808, GNorm = 0.2524, lr_0 = 7.0790e-04
Loss = 3.4024e-02, PNorm = 128.9358, GNorm = 0.2993, lr_0 = 7.0741e-04
Loss = 3.2987e-02, PNorm = 128.9914, GNorm = 0.3684, lr_0 = 7.0693e-04
Loss = 3.5086e-02, PNorm = 129.0455, GNorm = 0.2648, lr_0 = 7.0644e-04
Loss = 3.1747e-02, PNorm = 129.0952, GNorm = 0.3946, lr_0 = 7.0596e-04
Loss = 3.2027e-02, PNorm = 129.1495, GNorm = 0.8166, lr_0 = 7.0548e-04
Loss = 3.1703e-02, PNorm = 129.2053, GNorm = 0.6601, lr_0 = 7.0499e-04
Loss = 2.9190e-02, PNorm = 129.2602, GNorm = 0.4886, lr_0 = 7.0451e-04
Loss = 3.1384e-02, PNorm = 129.3102, GNorm = 0.4708, lr_0 = 7.0403e-04
Loss = 3.2773e-02, PNorm = 129.3555, GNorm = 0.3510, lr_0 = 7.0354e-04
Loss = 4.1389e-02, PNorm = 129.4067, GNorm = 0.3068, lr_0 = 7.0306e-04
Loss = 3.6182e-02, PNorm = 129.4685, GNorm = 0.7404, lr_0 = 7.0258e-04
Loss = 2.9600e-02, PNorm = 129.5276, GNorm = 0.4514, lr_0 = 7.0210e-04
Loss = 3.1898e-02, PNorm = 129.5826, GNorm = 0.4160, lr_0 = 7.0162e-04
Loss = 3.1973e-02, PNorm = 129.6425, GNorm = 0.8692, lr_0 = 7.0114e-04
Loss = 3.0299e-02, PNorm = 129.6952, GNorm = 0.2610, lr_0 = 7.0066e-04
Loss = 2.8716e-02, PNorm = 129.7520, GNorm = 0.3983, lr_0 = 7.0018e-04
Loss = 3.3573e-02, PNorm = 129.8095, GNorm = 0.5859, lr_0 = 6.9970e-04
Loss = 3.1392e-02, PNorm = 129.8657, GNorm = 0.5464, lr_0 = 6.9922e-04
Loss = 3.7497e-02, PNorm = 129.9211, GNorm = 0.6302, lr_0 = 6.9874e-04
Loss = 2.9608e-02, PNorm = 129.9832, GNorm = 0.5807, lr_0 = 6.9826e-04
Loss = 2.9989e-02, PNorm = 130.0348, GNorm = 0.5237, lr_0 = 6.9778e-04
Loss = 3.3380e-02, PNorm = 130.0973, GNorm = 0.2954, lr_0 = 6.9730e-04
Loss = 3.5639e-02, PNorm = 130.1609, GNorm = 0.4238, lr_0 = 6.9683e-04
Loss = 3.0979e-02, PNorm = 130.2201, GNorm = 0.3537, lr_0 = 6.9635e-04
Loss = 3.3097e-02, PNorm = 130.2808, GNorm = 0.7542, lr_0 = 6.9587e-04
Loss = 3.2626e-02, PNorm = 130.3374, GNorm = 0.4206, lr_0 = 6.9540e-04
Loss = 3.2293e-02, PNorm = 130.4011, GNorm = 0.4592, lr_0 = 6.9492e-04
Loss = 3.4311e-02, PNorm = 130.4624, GNorm = 0.1868, lr_0 = 6.9444e-04
Loss = 3.3632e-02, PNorm = 130.5159, GNorm = 0.4991, lr_0 = 6.9397e-04
Loss = 3.4058e-02, PNorm = 130.5865, GNorm = 0.7883, lr_0 = 6.9349e-04
Loss = 3.3288e-02, PNorm = 130.6486, GNorm = 0.4168, lr_0 = 6.9302e-04
Loss = 3.3503e-02, PNorm = 130.7194, GNorm = 0.2643, lr_0 = 6.9254e-04
Loss = 3.4150e-02, PNorm = 130.7864, GNorm = 0.3126, lr_0 = 6.9207e-04
Loss = 2.9396e-02, PNorm = 130.8517, GNorm = 0.5628, lr_0 = 6.9159e-04
Loss = 3.2587e-02, PNorm = 130.9107, GNorm = 0.4285, lr_0 = 6.9112e-04
Loss = 3.3683e-02, PNorm = 130.9736, GNorm = 0.4665, lr_0 = 6.9065e-04
Loss = 3.3958e-02, PNorm = 131.0321, GNorm = 0.4407, lr_0 = 6.9017e-04
Loss = 3.4741e-02, PNorm = 131.0883, GNorm = 0.9264, lr_0 = 6.8970e-04
Loss = 3.4796e-02, PNorm = 131.1470, GNorm = 0.4232, lr_0 = 6.8923e-04
Loss = 2.9855e-02, PNorm = 131.2024, GNorm = 0.2927, lr_0 = 6.8876e-04
Loss = 3.6364e-02, PNorm = 131.2673, GNorm = 0.2792, lr_0 = 6.8828e-04
Loss = 3.6179e-02, PNorm = 131.3347, GNorm = 0.3175, lr_0 = 6.8781e-04
Loss = 4.0188e-02, PNorm = 131.4008, GNorm = 0.4813, lr_0 = 6.8734e-04
Loss = 3.5995e-02, PNorm = 131.4753, GNorm = 0.2319, lr_0 = 6.8687e-04
Loss = 3.6128e-02, PNorm = 131.5511, GNorm = 0.6049, lr_0 = 6.8640e-04
Loss = 3.1892e-02, PNorm = 131.6091, GNorm = 0.5648, lr_0 = 6.8593e-04
Loss = 3.3034e-02, PNorm = 131.6733, GNorm = 0.5635, lr_0 = 6.8546e-04
Loss = 3.4827e-02, PNorm = 131.7318, GNorm = 0.7511, lr_0 = 6.8499e-04
Loss = 2.8550e-02, PNorm = 131.7948, GNorm = 0.2011, lr_0 = 6.8452e-04
Loss = 3.3603e-02, PNorm = 131.8521, GNorm = 0.4106, lr_0 = 6.8405e-04
Loss = 3.3450e-02, PNorm = 131.9124, GNorm = 0.2738, lr_0 = 6.8358e-04
Loss = 3.0504e-02, PNorm = 131.9715, GNorm = 0.4931, lr_0 = 6.8312e-04
Loss = 2.9998e-02, PNorm = 132.0306, GNorm = 0.2882, lr_0 = 6.8265e-04
Loss = 3.3257e-02, PNorm = 132.0926, GNorm = 0.3431, lr_0 = 6.8218e-04
Loss = 3.8474e-02, PNorm = 132.1524, GNorm = 0.3604, lr_0 = 6.8171e-04
Loss = 3.4217e-02, PNorm = 132.2172, GNorm = 0.3518, lr_0 = 6.8125e-04
Loss = 3.1430e-02, PNorm = 132.2747, GNorm = 0.3179, lr_0 = 6.8078e-04
Loss = 3.1935e-02, PNorm = 132.3361, GNorm = 0.2033, lr_0 = 6.8031e-04
Loss = 3.8571e-02, PNorm = 132.3980, GNorm = 0.5047, lr_0 = 6.7985e-04
Loss = 3.4775e-02, PNorm = 132.4646, GNorm = 0.3133, lr_0 = 6.7938e-04
Loss = 3.3707e-02, PNorm = 132.5267, GNorm = 0.6837, lr_0 = 6.7892e-04
Loss = 3.6116e-02, PNorm = 132.5858, GNorm = 0.2560, lr_0 = 6.7845e-04
Loss = 3.8538e-02, PNorm = 132.6532, GNorm = 0.9018, lr_0 = 6.7799e-04
Loss = 3.7100e-02, PNorm = 132.7234, GNorm = 0.6477, lr_0 = 6.7752e-04
Loss = 3.5595e-02, PNorm = 132.7977, GNorm = 0.3318, lr_0 = 6.7706e-04
Loss = 3.1598e-02, PNorm = 132.8699, GNorm = 0.2600, lr_0 = 6.7659e-04
Loss = 3.6144e-02, PNorm = 132.9346, GNorm = 0.3378, lr_0 = 6.7613e-04
Loss = 3.1861e-02, PNorm = 133.0045, GNorm = 0.2358, lr_0 = 6.7567e-04
Loss = 3.6095e-02, PNorm = 133.0746, GNorm = 0.5625, lr_0 = 6.7520e-04
Loss = 3.3027e-02, PNorm = 133.1415, GNorm = 0.2621, lr_0 = 6.7474e-04
Loss = 3.1095e-02, PNorm = 133.2017, GNorm = 0.6775, lr_0 = 6.7428e-04
Loss = 3.6893e-02, PNorm = 133.2650, GNorm = 0.6332, lr_0 = 6.7382e-04
Loss = 3.3139e-02, PNorm = 133.3245, GNorm = 0.2452, lr_0 = 6.7335e-04
Loss = 3.4184e-02, PNorm = 133.3852, GNorm = 0.6763, lr_0 = 6.7289e-04
Loss = 3.5437e-02, PNorm = 133.4409, GNorm = 0.6389, lr_0 = 6.7243e-04
Loss = 3.4221e-02, PNorm = 133.5072, GNorm = 0.7705, lr_0 = 6.7197e-04
Loss = 3.8127e-02, PNorm = 133.5803, GNorm = 0.2682, lr_0 = 6.7151e-04
Loss = 3.7627e-02, PNorm = 133.6544, GNorm = 0.2694, lr_0 = 6.7105e-04
Loss = 3.2312e-02, PNorm = 133.7365, GNorm = 0.4762, lr_0 = 6.7059e-04
Loss = 3.8482e-02, PNorm = 133.7988, GNorm = 0.4842, lr_0 = 6.7013e-04
Loss = 3.7060e-02, PNorm = 133.8686, GNorm = 0.3120, lr_0 = 6.6967e-04
Loss = 3.3286e-02, PNorm = 133.9311, GNorm = 0.4431, lr_0 = 6.6921e-04
Loss = 4.2697e-02, PNorm = 133.9945, GNorm = 0.6125, lr_0 = 6.6876e-04
Loss = 3.5598e-02, PNorm = 134.0613, GNorm = 0.3686, lr_0 = 6.6830e-04
Loss = 3.7914e-02, PNorm = 134.1279, GNorm = 0.3158, lr_0 = 6.6784e-04
Loss = 3.0179e-02, PNorm = 134.2011, GNorm = 0.5281, lr_0 = 6.6738e-04
Loss = 3.3757e-02, PNorm = 134.2638, GNorm = 0.9986, lr_0 = 6.6693e-04
Loss = 3.0824e-02, PNorm = 134.3338, GNorm = 0.4704, lr_0 = 6.6647e-04
Loss = 2.8618e-02, PNorm = 134.3929, GNorm = 0.5720, lr_0 = 6.6601e-04
Loss = 3.7065e-02, PNorm = 134.4630, GNorm = 0.4614, lr_0 = 6.6556e-04
Loss = 3.9136e-02, PNorm = 134.5326, GNorm = 0.8774, lr_0 = 6.6510e-04
Loss = 3.0460e-02, PNorm = 134.6025, GNorm = 0.4439, lr_0 = 6.6464e-04
Loss = 4.3247e-02, PNorm = 134.6730, GNorm = 0.2341, lr_0 = 6.6419e-04
Loss = 3.1209e-02, PNorm = 134.7497, GNorm = 0.2418, lr_0 = 6.6373e-04
Loss = 3.7758e-02, PNorm = 134.8219, GNorm = 0.4552, lr_0 = 6.6328e-04
Loss = 3.6679e-02, PNorm = 134.8945, GNorm = 0.3002, lr_0 = 6.6282e-04
Validation mae = 0.286146
Epoch 7
Loss = 2.9460e-02, PNorm = 134.9573, GNorm = 0.4855, lr_0 = 6.6237e-04
Loss = 2.5414e-02, PNorm = 135.0104, GNorm = 0.3047, lr_0 = 6.6192e-04
Loss = 2.9583e-02, PNorm = 135.0519, GNorm = 0.6581, lr_0 = 6.6146e-04
Loss = 2.9371e-02, PNorm = 135.0976, GNorm = 0.1731, lr_0 = 6.6101e-04
Loss = 2.8183e-02, PNorm = 135.1413, GNorm = 0.4902, lr_0 = 6.6056e-04
Loss = 2.8577e-02, PNorm = 135.1848, GNorm = 0.1749, lr_0 = 6.6011e-04
Loss = 2.7058e-02, PNorm = 135.2255, GNorm = 0.2148, lr_0 = 6.5965e-04
Loss = 2.6859e-02, PNorm = 135.2716, GNorm = 0.3449, lr_0 = 6.5920e-04
Loss = 2.7141e-02, PNorm = 135.3168, GNorm = 0.2731, lr_0 = 6.5875e-04
Loss = 2.3643e-02, PNorm = 135.3559, GNorm = 0.2336, lr_0 = 6.5830e-04
Loss = 2.6624e-02, PNorm = 135.3968, GNorm = 0.1887, lr_0 = 6.5785e-04
Loss = 2.7062e-02, PNorm = 135.4388, GNorm = 0.5778, lr_0 = 6.5740e-04
Loss = 2.4664e-02, PNorm = 135.4839, GNorm = 0.3638, lr_0 = 6.5695e-04
Loss = 3.2507e-02, PNorm = 135.5299, GNorm = 0.6078, lr_0 = 6.5650e-04
Loss = 3.0570e-02, PNorm = 135.5740, GNorm = 0.2405, lr_0 = 6.5605e-04
Loss = 2.6489e-02, PNorm = 135.6158, GNorm = 0.3586, lr_0 = 6.5560e-04
Loss = 2.6417e-02, PNorm = 135.6614, GNorm = 0.3039, lr_0 = 6.5515e-04
Loss = 2.5325e-02, PNorm = 135.7039, GNorm = 0.2514, lr_0 = 6.5470e-04
Loss = 2.4303e-02, PNorm = 135.7480, GNorm = 0.5737, lr_0 = 6.5425e-04
Loss = 2.9735e-02, PNorm = 135.7960, GNorm = 0.3836, lr_0 = 6.5380e-04
Loss = 2.4501e-02, PNorm = 135.8416, GNorm = 0.4106, lr_0 = 6.5335e-04
Loss = 2.5394e-02, PNorm = 135.8819, GNorm = 0.4358, lr_0 = 6.5291e-04
Loss = 2.9534e-02, PNorm = 135.9300, GNorm = 0.4985, lr_0 = 6.5246e-04
Loss = 2.4929e-02, PNorm = 135.9764, GNorm = 0.2737, lr_0 = 6.5201e-04
Loss = 2.5831e-02, PNorm = 136.0178, GNorm = 0.3223, lr_0 = 6.5157e-04
Loss = 3.0164e-02, PNorm = 136.0683, GNorm = 0.3143, lr_0 = 6.5112e-04
Loss = 2.9462e-02, PNorm = 136.1193, GNorm = 0.6689, lr_0 = 6.5067e-04
Loss = 2.6205e-02, PNorm = 136.1641, GNorm = 0.3380, lr_0 = 6.5023e-04
Loss = 2.3624e-02, PNorm = 136.2148, GNorm = 0.2870, lr_0 = 6.4978e-04
Loss = 2.1945e-02, PNorm = 136.2621, GNorm = 0.4846, lr_0 = 6.4934e-04
Loss = 2.4579e-02, PNorm = 136.3087, GNorm = 0.3890, lr_0 = 6.4889e-04
Loss = 2.2671e-02, PNorm = 136.3532, GNorm = 0.2799, lr_0 = 6.4845e-04
Loss = 2.7718e-02, PNorm = 136.3897, GNorm = 0.2754, lr_0 = 6.4800e-04
Loss = 2.8916e-02, PNorm = 136.4422, GNorm = 0.5930, lr_0 = 6.4756e-04
Loss = 2.5969e-02, PNorm = 136.4936, GNorm = 0.4611, lr_0 = 6.4712e-04
Loss = 2.7511e-02, PNorm = 136.5523, GNorm = 0.4771, lr_0 = 6.4667e-04
Loss = 2.7926e-02, PNorm = 136.6014, GNorm = 0.9336, lr_0 = 6.4623e-04
Loss = 3.4876e-02, PNorm = 136.6592, GNorm = 0.8393, lr_0 = 6.4579e-04
Loss = 2.7167e-02, PNorm = 136.7067, GNorm = 0.3159, lr_0 = 6.4534e-04
Loss = 2.3885e-02, PNorm = 136.7597, GNorm = 0.2327, lr_0 = 6.4490e-04
Loss = 2.3713e-02, PNorm = 136.8098, GNorm = 0.2716, lr_0 = 6.4446e-04
Loss = 2.6103e-02, PNorm = 136.8555, GNorm = 0.1791, lr_0 = 6.4402e-04
Loss = 2.2704e-02, PNorm = 136.8973, GNorm = 0.3620, lr_0 = 6.4358e-04
Loss = 2.3995e-02, PNorm = 136.9459, GNorm = 0.5311, lr_0 = 6.4314e-04
Loss = 2.5654e-02, PNorm = 136.9891, GNorm = 0.1744, lr_0 = 6.4270e-04
Loss = 2.8575e-02, PNorm = 137.0323, GNorm = 0.6593, lr_0 = 6.4226e-04
Loss = 3.1149e-02, PNorm = 137.0857, GNorm = 0.5918, lr_0 = 6.4182e-04
Loss = 2.7659e-02, PNorm = 137.1444, GNorm = 0.7235, lr_0 = 6.4138e-04
Loss = 2.5208e-02, PNorm = 137.2043, GNorm = 0.2074, lr_0 = 6.4094e-04
Loss = 2.7205e-02, PNorm = 137.2559, GNorm = 0.2383, lr_0 = 6.4050e-04
Loss = 2.6452e-02, PNorm = 137.3071, GNorm = 0.3476, lr_0 = 6.4006e-04
Loss = 2.9318e-02, PNorm = 137.3531, GNorm = 0.7062, lr_0 = 6.3962e-04
Loss = 2.5033e-02, PNorm = 137.3997, GNorm = 0.7877, lr_0 = 6.3918e-04
Loss = 2.7687e-02, PNorm = 137.4507, GNorm = 0.3847, lr_0 = 6.3874e-04
Loss = 2.9729e-02, PNorm = 137.5009, GNorm = 0.2255, lr_0 = 6.3831e-04
Loss = 2.6357e-02, PNorm = 137.5502, GNorm = 0.4108, lr_0 = 6.3787e-04
Loss = 2.6211e-02, PNorm = 137.6000, GNorm = 0.2212, lr_0 = 6.3743e-04
Loss = 2.6623e-02, PNorm = 137.6483, GNorm = 0.2627, lr_0 = 6.3700e-04
Loss = 2.8582e-02, PNorm = 137.7009, GNorm = 0.5525, lr_0 = 6.3656e-04
Loss = 3.5728e-02, PNorm = 137.7564, GNorm = 0.2648, lr_0 = 6.3612e-04
Loss = 2.8582e-02, PNorm = 137.8139, GNorm = 0.3690, lr_0 = 6.3569e-04
Loss = 2.9108e-02, PNorm = 137.8714, GNorm = 0.7776, lr_0 = 6.3525e-04
Loss = 3.1279e-02, PNorm = 137.9285, GNorm = 0.4448, lr_0 = 6.3482e-04
Loss = 2.6101e-02, PNorm = 137.9976, GNorm = 0.6197, lr_0 = 6.3438e-04
Loss = 2.7141e-02, PNorm = 138.0551, GNorm = 0.3559, lr_0 = 6.3395e-04
Loss = 2.6779e-02, PNorm = 138.1130, GNorm = 0.6227, lr_0 = 6.3351e-04
Loss = 2.4543e-02, PNorm = 138.1704, GNorm = 0.2590, lr_0 = 6.3308e-04
Loss = 3.0900e-02, PNorm = 138.2240, GNorm = 0.8851, lr_0 = 6.3265e-04
Loss = 3.1042e-02, PNorm = 138.2790, GNorm = 0.2356, lr_0 = 6.3221e-04
Loss = 2.7546e-02, PNorm = 138.3298, GNorm = 1.0698, lr_0 = 6.3178e-04
Loss = 2.9349e-02, PNorm = 138.3917, GNorm = 0.4917, lr_0 = 6.3135e-04
Loss = 3.0720e-02, PNorm = 138.4448, GNorm = 0.3937, lr_0 = 6.3091e-04
Loss = 2.4474e-02, PNorm = 138.4998, GNorm = 0.1878, lr_0 = 6.3048e-04
Loss = 2.6797e-02, PNorm = 138.5530, GNorm = 0.5544, lr_0 = 6.3005e-04
Loss = 2.6319e-02, PNorm = 138.6044, GNorm = 0.3234, lr_0 = 6.2962e-04
Loss = 2.7888e-02, PNorm = 138.6552, GNorm = 0.3248, lr_0 = 6.2919e-04
Loss = 3.1718e-02, PNorm = 138.7105, GNorm = 0.3593, lr_0 = 6.2876e-04
Loss = 2.7180e-02, PNorm = 138.7707, GNorm = 0.6008, lr_0 = 6.2833e-04
Loss = 2.7201e-02, PNorm = 138.8346, GNorm = 0.3936, lr_0 = 6.2789e-04
Loss = 3.0589e-02, PNorm = 138.8936, GNorm = 0.2526, lr_0 = 6.2746e-04
Loss = 2.6477e-02, PNorm = 138.9614, GNorm = 0.5025, lr_0 = 6.2703e-04
Loss = 2.5039e-02, PNorm = 139.0173, GNorm = 0.4570, lr_0 = 6.2661e-04
Loss = 2.9348e-02, PNorm = 139.0729, GNorm = 0.3111, lr_0 = 6.2618e-04
Loss = 2.9121e-02, PNorm = 139.1222, GNorm = 0.3971, lr_0 = 6.2575e-04
Loss = 2.8731e-02, PNorm = 139.1654, GNorm = 0.2996, lr_0 = 6.2532e-04
Loss = 2.7979e-02, PNorm = 139.2223, GNorm = 0.1734, lr_0 = 6.2489e-04
Loss = 2.9135e-02, PNorm = 139.2832, GNorm = 0.5195, lr_0 = 6.2446e-04
Loss = 2.7491e-02, PNorm = 139.3449, GNorm = 0.4453, lr_0 = 6.2403e-04
Loss = 2.8388e-02, PNorm = 139.4044, GNorm = 0.3262, lr_0 = 6.2361e-04
Loss = 3.0521e-02, PNorm = 139.4681, GNorm = 0.4356, lr_0 = 6.2318e-04
Loss = 2.9509e-02, PNorm = 139.5310, GNorm = 0.5475, lr_0 = 6.2275e-04
Loss = 3.1013e-02, PNorm = 139.5929, GNorm = 0.7011, lr_0 = 6.2233e-04
Loss = 3.0603e-02, PNorm = 139.6581, GNorm = 0.2458, lr_0 = 6.2190e-04
Loss = 3.1425e-02, PNorm = 139.7170, GNorm = 0.2356, lr_0 = 6.2147e-04
Loss = 2.9824e-02, PNorm = 139.7807, GNorm = 0.5875, lr_0 = 6.2105e-04
Loss = 2.9989e-02, PNorm = 139.8445, GNorm = 0.2614, lr_0 = 6.2062e-04
Loss = 2.3353e-02, PNorm = 139.9012, GNorm = 0.2951, lr_0 = 6.2020e-04
Loss = 2.7519e-02, PNorm = 139.9638, GNorm = 0.3816, lr_0 = 6.1977e-04
Loss = 2.9581e-02, PNorm = 140.0276, GNorm = 0.4195, lr_0 = 6.1935e-04
Loss = 2.7235e-02, PNorm = 140.0935, GNorm = 0.4545, lr_0 = 6.1892e-04
Loss = 2.9815e-02, PNorm = 140.1540, GNorm = 0.5380, lr_0 = 6.1850e-04
Loss = 3.5470e-02, PNorm = 140.2179, GNorm = 0.6189, lr_0 = 6.1808e-04
Loss = 3.3539e-02, PNorm = 140.2801, GNorm = 0.4362, lr_0 = 6.1765e-04
Loss = 3.0640e-02, PNorm = 140.3433, GNorm = 0.5926, lr_0 = 6.1723e-04
Loss = 2.8953e-02, PNorm = 140.4019, GNorm = 0.5521, lr_0 = 6.1681e-04
Loss = 2.9565e-02, PNorm = 140.4591, GNorm = 0.1562, lr_0 = 6.1638e-04
Loss = 2.6910e-02, PNorm = 140.5232, GNorm = 0.4654, lr_0 = 6.1596e-04
Loss = 3.2097e-02, PNorm = 140.5771, GNorm = 0.4637, lr_0 = 6.1554e-04
Loss = 2.9219e-02, PNorm = 140.6327, GNorm = 0.5360, lr_0 = 6.1512e-04
Loss = 2.7422e-02, PNorm = 140.6867, GNorm = 0.2624, lr_0 = 6.1470e-04
Loss = 2.7311e-02, PNorm = 140.7460, GNorm = 0.2831, lr_0 = 6.1428e-04
Loss = 3.1359e-02, PNorm = 140.8013, GNorm = 0.7105, lr_0 = 6.1385e-04
Loss = 2.7138e-02, PNorm = 140.8647, GNorm = 0.3831, lr_0 = 6.1343e-04
Loss = 2.6234e-02, PNorm = 140.9191, GNorm = 0.3119, lr_0 = 6.1301e-04
Loss = 2.5822e-02, PNorm = 140.9710, GNorm = 0.1801, lr_0 = 6.1259e-04
Loss = 2.9305e-02, PNorm = 141.0262, GNorm = 0.4884, lr_0 = 6.1217e-04
Loss = 2.9300e-02, PNorm = 141.0838, GNorm = 0.2032, lr_0 = 6.1175e-04
Loss = 3.1864e-02, PNorm = 141.1396, GNorm = 0.5708, lr_0 = 6.1134e-04
Loss = 3.0008e-02, PNorm = 141.1991, GNorm = 0.4588, lr_0 = 6.1092e-04
Loss = 3.3603e-02, PNorm = 141.2685, GNorm = 0.3680, lr_0 = 6.1050e-04
Validation mae = 0.284976
Epoch 8
Loss = 2.4252e-02, PNorm = 141.3149, GNorm = 0.4707, lr_0 = 6.1008e-04
Loss = 2.2432e-02, PNorm = 141.3613, GNorm = 0.5789, lr_0 = 6.0966e-04
Loss = 2.4181e-02, PNorm = 141.3991, GNorm = 0.2292, lr_0 = 6.0924e-04
Loss = 2.5463e-02, PNorm = 141.4405, GNorm = 0.9502, lr_0 = 6.0883e-04
Loss = 2.3991e-02, PNorm = 141.4853, GNorm = 0.2099, lr_0 = 6.0841e-04
Loss = 2.3476e-02, PNorm = 141.5260, GNorm = 0.4885, lr_0 = 6.0799e-04
Loss = 1.7496e-02, PNorm = 141.5582, GNorm = 0.2813, lr_0 = 6.0758e-04
Loss = 2.1528e-02, PNorm = 141.5817, GNorm = 0.2052, lr_0 = 6.0716e-04
Loss = 2.1746e-02, PNorm = 141.6103, GNorm = 0.2583, lr_0 = 6.0674e-04
Loss = 2.2279e-02, PNorm = 141.6455, GNorm = 0.3047, lr_0 = 6.0633e-04
Loss = 2.0712e-02, PNorm = 141.6906, GNorm = 0.8755, lr_0 = 6.0591e-04
Loss = 2.1054e-02, PNorm = 141.7279, GNorm = 0.3049, lr_0 = 6.0550e-04
Loss = 2.0695e-02, PNorm = 141.7687, GNorm = 0.5016, lr_0 = 6.0508e-04
Loss = 2.2233e-02, PNorm = 141.8064, GNorm = 0.2950, lr_0 = 6.0467e-04
Loss = 2.1894e-02, PNorm = 141.8424, GNorm = 0.3467, lr_0 = 6.0425e-04
Loss = 2.2832e-02, PNorm = 141.8740, GNorm = 0.4503, lr_0 = 6.0384e-04
Loss = 2.0683e-02, PNorm = 141.9164, GNorm = 0.6516, lr_0 = 6.0343e-04
Loss = 2.0720e-02, PNorm = 141.9532, GNorm = 0.3090, lr_0 = 6.0301e-04
Loss = 2.0465e-02, PNorm = 141.9948, GNorm = 0.1902, lr_0 = 6.0260e-04
Loss = 1.9680e-02, PNorm = 142.0349, GNorm = 0.4138, lr_0 = 6.0219e-04
Loss = 1.7719e-02, PNorm = 142.0701, GNorm = 0.2934, lr_0 = 6.0178e-04
Loss = 1.7634e-02, PNorm = 142.1051, GNorm = 0.2525, lr_0 = 6.0136e-04
Loss = 2.0512e-02, PNorm = 142.1378, GNorm = 0.3800, lr_0 = 6.0095e-04
Loss = 2.4223e-02, PNorm = 142.1715, GNorm = 1.3561, lr_0 = 6.0054e-04
Loss = 1.8510e-02, PNorm = 142.2041, GNorm = 0.1432, lr_0 = 6.0013e-04
Loss = 2.3000e-02, PNorm = 142.2369, GNorm = 0.2135, lr_0 = 5.9972e-04
Loss = 2.1946e-02, PNorm = 142.2754, GNorm = 0.4017, lr_0 = 5.9931e-04
Loss = 2.7568e-02, PNorm = 142.3138, GNorm = 0.1559, lr_0 = 5.9890e-04
Loss = 2.2623e-02, PNorm = 142.3545, GNorm = 0.1768, lr_0 = 5.9849e-04
Loss = 2.2619e-02, PNorm = 142.3946, GNorm = 0.5594, lr_0 = 5.9808e-04
Loss = 1.9242e-02, PNorm = 142.4357, GNorm = 0.3202, lr_0 = 5.9767e-04
Loss = 2.1952e-02, PNorm = 142.4713, GNorm = 0.3239, lr_0 = 5.9726e-04
Loss = 1.9771e-02, PNorm = 142.5118, GNorm = 0.2713, lr_0 = 5.9685e-04
Loss = 2.0886e-02, PNorm = 142.5481, GNorm = 0.2382, lr_0 = 5.9644e-04
Loss = 2.1147e-02, PNorm = 142.5843, GNorm = 0.5790, lr_0 = 5.9603e-04
Loss = 2.2843e-02, PNorm = 142.6200, GNorm = 0.4782, lr_0 = 5.9562e-04
Loss = 2.1938e-02, PNorm = 142.6561, GNorm = 0.2200, lr_0 = 5.9521e-04
Loss = 2.0369e-02, PNorm = 142.6990, GNorm = 0.2962, lr_0 = 5.9481e-04
Loss = 2.0366e-02, PNorm = 142.7397, GNorm = 0.6009, lr_0 = 5.9440e-04
Loss = 1.7508e-02, PNorm = 142.7796, GNorm = 0.2181, lr_0 = 5.9399e-04
Loss = 2.0768e-02, PNorm = 142.8186, GNorm = 0.2864, lr_0 = 5.9358e-04
Loss = 2.2569e-02, PNorm = 142.8542, GNorm = 0.3756, lr_0 = 5.9318e-04
Loss = 1.8751e-02, PNorm = 142.8935, GNorm = 0.2781, lr_0 = 5.9277e-04
Loss = 2.0872e-02, PNorm = 142.9332, GNorm = 0.1986, lr_0 = 5.9236e-04
Loss = 2.3976e-02, PNorm = 142.9766, GNorm = 0.7278, lr_0 = 5.9196e-04
Loss = 2.6978e-02, PNorm = 143.0169, GNorm = 0.5065, lr_0 = 5.9155e-04
Loss = 2.4338e-02, PNorm = 143.0682, GNorm = 0.4277, lr_0 = 5.9115e-04
Loss = 1.8589e-02, PNorm = 143.1140, GNorm = 0.3709, lr_0 = 5.9074e-04
Loss = 1.9450e-02, PNorm = 143.1553, GNorm = 0.3741, lr_0 = 5.9034e-04
Loss = 2.0668e-02, PNorm = 143.1957, GNorm = 0.3061, lr_0 = 5.8993e-04
Loss = 2.0869e-02, PNorm = 143.2382, GNorm = 0.4983, lr_0 = 5.8953e-04
Loss = 1.9202e-02, PNorm = 143.2850, GNorm = 0.4691, lr_0 = 5.8913e-04
Loss = 2.2477e-02, PNorm = 143.3288, GNorm = 0.4202, lr_0 = 5.8872e-04
Loss = 2.0577e-02, PNorm = 143.3701, GNorm = 0.4622, lr_0 = 5.8832e-04
Loss = 1.9576e-02, PNorm = 143.4119, GNorm = 0.4415, lr_0 = 5.8792e-04
Loss = 2.3097e-02, PNorm = 143.4563, GNorm = 0.2256, lr_0 = 5.8751e-04
Loss = 1.9492e-02, PNorm = 143.4993, GNorm = 0.2024, lr_0 = 5.8711e-04
Loss = 2.0195e-02, PNorm = 143.5391, GNorm = 0.2884, lr_0 = 5.8671e-04
Loss = 2.0631e-02, PNorm = 143.5811, GNorm = 0.6412, lr_0 = 5.8631e-04
Loss = 2.0867e-02, PNorm = 143.6216, GNorm = 0.1724, lr_0 = 5.8591e-04
Loss = 2.4380e-02, PNorm = 143.6665, GNorm = 0.4383, lr_0 = 5.8550e-04
Loss = 2.6306e-02, PNorm = 143.7109, GNorm = 0.2736, lr_0 = 5.8510e-04
Loss = 2.2983e-02, PNorm = 143.7544, GNorm = 0.4607, lr_0 = 5.8470e-04
Loss = 2.0620e-02, PNorm = 143.7988, GNorm = 0.2871, lr_0 = 5.8430e-04
Loss = 1.9529e-02, PNorm = 143.8377, GNorm = 0.3271, lr_0 = 5.8390e-04
Loss = 2.4719e-02, PNorm = 143.8785, GNorm = 0.2818, lr_0 = 5.8350e-04
Loss = 1.9249e-02, PNorm = 143.9242, GNorm = 0.3019, lr_0 = 5.8310e-04
Loss = 2.1142e-02, PNorm = 143.9649, GNorm = 0.1518, lr_0 = 5.8270e-04
Loss = 2.0295e-02, PNorm = 144.0060, GNorm = 0.1543, lr_0 = 5.8230e-04
Loss = 2.5808e-02, PNorm = 144.0471, GNorm = 0.3101, lr_0 = 5.8190e-04
Loss = 2.0943e-02, PNorm = 144.0909, GNorm = 0.4694, lr_0 = 5.8151e-04
Loss = 1.8495e-02, PNorm = 144.1311, GNorm = 0.1859, lr_0 = 5.8111e-04
Loss = 2.5855e-02, PNorm = 144.1757, GNorm = 0.4017, lr_0 = 5.8071e-04
Loss = 1.8884e-02, PNorm = 144.2186, GNorm = 0.1945, lr_0 = 5.8031e-04
Loss = 1.9599e-02, PNorm = 144.2662, GNorm = 0.4634, lr_0 = 5.7991e-04
Loss = 2.0444e-02, PNorm = 144.3138, GNorm = 0.1704, lr_0 = 5.7952e-04
Loss = 2.2092e-02, PNorm = 144.3602, GNorm = 0.3512, lr_0 = 5.7912e-04
Loss = 2.0797e-02, PNorm = 144.4027, GNorm = 0.2910, lr_0 = 5.7872e-04
Loss = 1.9287e-02, PNorm = 144.4494, GNorm = 0.4200, lr_0 = 5.7833e-04
Loss = 1.9328e-02, PNorm = 144.4891, GNorm = 0.4774, lr_0 = 5.7793e-04
Loss = 1.9241e-02, PNorm = 144.5281, GNorm = 0.7496, lr_0 = 5.7753e-04
Loss = 2.2983e-02, PNorm = 144.5689, GNorm = 0.6411, lr_0 = 5.7714e-04
Loss = 2.1212e-02, PNorm = 144.6134, GNorm = 0.3695, lr_0 = 5.7674e-04
Loss = 2.4019e-02, PNorm = 144.6550, GNorm = 0.2104, lr_0 = 5.7635e-04
Loss = 2.6320e-02, PNorm = 144.7045, GNorm = 0.2085, lr_0 = 5.7595e-04
Loss = 2.1248e-02, PNorm = 144.7504, GNorm = 0.2354, lr_0 = 5.7556e-04
Loss = 2.5291e-02, PNorm = 144.7973, GNorm = 0.7633, lr_0 = 5.7516e-04
Loss = 2.6548e-02, PNorm = 144.8467, GNorm = 0.5330, lr_0 = 5.7477e-04
Loss = 2.5563e-02, PNorm = 144.8957, GNorm = 0.3128, lr_0 = 5.7438e-04
Loss = 2.6493e-02, PNorm = 144.9437, GNorm = 0.2208, lr_0 = 5.7398e-04
Loss = 2.5285e-02, PNorm = 144.9913, GNorm = 0.5816, lr_0 = 5.7359e-04
Loss = 2.4619e-02, PNorm = 145.0427, GNorm = 0.2227, lr_0 = 5.7320e-04
Loss = 2.0514e-02, PNorm = 145.0928, GNorm = 0.4443, lr_0 = 5.7280e-04
Loss = 1.9867e-02, PNorm = 145.1384, GNorm = 0.1748, lr_0 = 5.7241e-04
Loss = 2.6122e-02, PNorm = 145.1858, GNorm = 0.4216, lr_0 = 5.7202e-04
Loss = 2.1305e-02, PNorm = 145.2305, GNorm = 0.3093, lr_0 = 5.7163e-04
Loss = 2.2099e-02, PNorm = 145.2821, GNorm = 0.3427, lr_0 = 5.7124e-04
Loss = 2.2737e-02, PNorm = 145.3347, GNorm = 0.1975, lr_0 = 5.7084e-04
Loss = 2.7935e-02, PNorm = 145.3907, GNorm = 0.7785, lr_0 = 5.7045e-04
Loss = 2.1819e-02, PNorm = 145.4465, GNorm = 0.1515, lr_0 = 5.7006e-04
Loss = 2.5928e-02, PNorm = 145.4988, GNorm = 0.6286, lr_0 = 5.6967e-04
Loss = 1.7929e-02, PNorm = 145.5515, GNorm = 0.2276, lr_0 = 5.6928e-04
Loss = 2.3676e-02, PNorm = 145.5990, GNorm = 0.3949, lr_0 = 5.6889e-04
Loss = 2.1773e-02, PNorm = 145.6451, GNorm = 0.3812, lr_0 = 5.6850e-04
Loss = 2.1020e-02, PNorm = 145.6969, GNorm = 0.5031, lr_0 = 5.6811e-04
Loss = 2.0424e-02, PNorm = 145.7463, GNorm = 0.3597, lr_0 = 5.6772e-04
Loss = 1.9367e-02, PNorm = 145.7947, GNorm = 0.4218, lr_0 = 5.6733e-04
Loss = 2.0292e-02, PNorm = 145.8374, GNorm = 0.5587, lr_0 = 5.6695e-04
Loss = 2.5614e-02, PNorm = 145.8830, GNorm = 0.4737, lr_0 = 5.6656e-04
Loss = 2.4033e-02, PNorm = 145.9340, GNorm = 0.6200, lr_0 = 5.6617e-04
Loss = 2.2398e-02, PNorm = 145.9915, GNorm = 0.4521, lr_0 = 5.6578e-04
Loss = 2.3507e-02, PNorm = 146.0408, GNorm = 0.2156, lr_0 = 5.6539e-04
Loss = 2.3543e-02, PNorm = 146.0953, GNorm = 0.2107, lr_0 = 5.6501e-04
Loss = 2.1182e-02, PNorm = 146.1436, GNorm = 0.5094, lr_0 = 5.6462e-04
Loss = 2.3689e-02, PNorm = 146.2066, GNorm = 0.2162, lr_0 = 5.6423e-04
Loss = 2.3049e-02, PNorm = 146.2611, GNorm = 0.3913, lr_0 = 5.6385e-04
Loss = 2.2146e-02, PNorm = 146.3102, GNorm = 0.2470, lr_0 = 5.6346e-04
Loss = 2.3524e-02, PNorm = 146.3619, GNorm = 0.9041, lr_0 = 5.6307e-04
Loss = 2.0550e-02, PNorm = 146.4109, GNorm = 0.4879, lr_0 = 5.6269e-04
Loss = 2.2387e-02, PNorm = 146.4555, GNorm = 0.5730, lr_0 = 5.6230e-04
Validation mae = 0.284255
Epoch 9
Loss = 2.0297e-02, PNorm = 146.4947, GNorm = 0.3073, lr_0 = 5.6192e-04
Loss = 2.4930e-02, PNorm = 146.5353, GNorm = 0.4029, lr_0 = 5.6153e-04
Loss = 1.9096e-02, PNorm = 146.5707, GNorm = 0.7316, lr_0 = 5.6115e-04
Loss = 1.7413e-02, PNorm = 146.6067, GNorm = 0.7156, lr_0 = 5.6076e-04
Loss = 1.7023e-02, PNorm = 146.6375, GNorm = 0.3881, lr_0 = 5.6038e-04
Loss = 1.9645e-02, PNorm = 146.6696, GNorm = 0.9087, lr_0 = 5.6000e-04
Loss = 1.6812e-02, PNorm = 146.6940, GNorm = 0.1842, lr_0 = 5.5961e-04
Loss = 1.7212e-02, PNorm = 146.7304, GNorm = 0.2881, lr_0 = 5.5923e-04
Loss = 1.7448e-02, PNorm = 146.7688, GNorm = 0.3546, lr_0 = 5.5885e-04
Loss = 2.0016e-02, PNorm = 146.8060, GNorm = 0.4056, lr_0 = 5.5846e-04
Loss = 1.9048e-02, PNorm = 146.8476, GNorm = 0.2610, lr_0 = 5.5808e-04
Loss = 1.9374e-02, PNorm = 146.8851, GNorm = 0.7280, lr_0 = 5.5770e-04
Loss = 2.4936e-02, PNorm = 146.9185, GNorm = 0.7230, lr_0 = 5.5732e-04
Loss = 1.6246e-02, PNorm = 146.9562, GNorm = 0.1101, lr_0 = 5.5693e-04
Loss = 2.2161e-02, PNorm = 146.9899, GNorm = 0.1789, lr_0 = 5.5655e-04
Loss = 1.8660e-02, PNorm = 147.0234, GNorm = 0.2168, lr_0 = 5.5617e-04
Loss = 1.7064e-02, PNorm = 147.0537, GNorm = 0.6577, lr_0 = 5.5579e-04
Loss = 2.0961e-02, PNorm = 147.0918, GNorm = 0.3807, lr_0 = 5.5541e-04
Loss = 1.9939e-02, PNorm = 147.1268, GNorm = 0.5782, lr_0 = 5.5503e-04
Loss = 2.2602e-02, PNorm = 147.1656, GNorm = 0.3171, lr_0 = 5.5465e-04
Loss = 1.8047e-02, PNorm = 147.2017, GNorm = 0.1979, lr_0 = 5.5427e-04
Loss = 1.6128e-02, PNorm = 147.2367, GNorm = 0.2609, lr_0 = 5.5389e-04
Loss = 1.8024e-02, PNorm = 147.2704, GNorm = 0.3047, lr_0 = 5.5351e-04
Loss = 1.4664e-02, PNorm = 147.3050, GNorm = 0.3929, lr_0 = 5.5313e-04
Loss = 1.8096e-02, PNorm = 147.3365, GNorm = 0.5004, lr_0 = 5.5275e-04
Loss = 2.0496e-02, PNorm = 147.3671, GNorm = 0.5554, lr_0 = 5.5237e-04
Loss = 1.5103e-02, PNorm = 147.3999, GNorm = 0.2473, lr_0 = 5.5199e-04
Loss = 1.4409e-02, PNorm = 147.4306, GNorm = 0.4376, lr_0 = 5.5162e-04
Loss = 1.4332e-02, PNorm = 147.4618, GNorm = 0.3472, lr_0 = 5.5124e-04
Loss = 1.7106e-02, PNorm = 147.4960, GNorm = 0.6797, lr_0 = 5.5086e-04
Loss = 1.6995e-02, PNorm = 147.5227, GNorm = 0.8098, lr_0 = 5.5048e-04
Loss = 1.7331e-02, PNorm = 147.5552, GNorm = 0.1538, lr_0 = 5.5011e-04
Loss = 1.9427e-02, PNorm = 147.5885, GNorm = 0.3608, lr_0 = 5.4973e-04
Loss = 1.9582e-02, PNorm = 147.6226, GNorm = 0.3952, lr_0 = 5.4935e-04
Loss = 1.7917e-02, PNorm = 147.6612, GNorm = 0.3901, lr_0 = 5.4898e-04
Loss = 1.4959e-02, PNorm = 147.6970, GNorm = 0.2427, lr_0 = 5.4860e-04
Loss = 1.4836e-02, PNorm = 147.7316, GNorm = 0.4624, lr_0 = 5.4822e-04
Loss = 1.4683e-02, PNorm = 147.7590, GNorm = 0.2814, lr_0 = 5.4785e-04
Loss = 1.7189e-02, PNorm = 147.7872, GNorm = 0.3923, lr_0 = 5.4747e-04
Loss = 1.6790e-02, PNorm = 147.8210, GNorm = 0.4712, lr_0 = 5.4710e-04
Loss = 1.9132e-02, PNorm = 147.8568, GNorm = 0.2166, lr_0 = 5.4672e-04
Loss = 1.5362e-02, PNorm = 147.8917, GNorm = 0.5174, lr_0 = 5.4635e-04
Loss = 1.4540e-02, PNorm = 147.9267, GNorm = 0.3298, lr_0 = 5.4597e-04
Loss = 1.8258e-02, PNorm = 147.9578, GNorm = 0.2808, lr_0 = 5.4560e-04
Loss = 1.6751e-02, PNorm = 147.9927, GNorm = 0.3805, lr_0 = 5.4523e-04
Loss = 1.6874e-02, PNorm = 148.0290, GNorm = 0.1659, lr_0 = 5.4485e-04
Loss = 1.6980e-02, PNorm = 148.0610, GNorm = 0.3233, lr_0 = 5.4448e-04
Loss = 1.2896e-02, PNorm = 148.0891, GNorm = 0.2131, lr_0 = 5.4411e-04
Loss = 1.6217e-02, PNorm = 148.1218, GNorm = 0.3652, lr_0 = 5.4373e-04
Loss = 1.3721e-02, PNorm = 148.1563, GNorm = 0.3156, lr_0 = 5.4336e-04
Loss = 1.7969e-02, PNorm = 148.1904, GNorm = 0.4340, lr_0 = 5.4299e-04
Loss = 1.6640e-02, PNorm = 148.2237, GNorm = 0.4869, lr_0 = 5.4262e-04
Loss = 1.6502e-02, PNorm = 148.2579, GNorm = 0.3231, lr_0 = 5.4225e-04
Loss = 1.4907e-02, PNorm = 148.2931, GNorm = 0.5182, lr_0 = 5.4187e-04
Loss = 1.9885e-02, PNorm = 148.3290, GNorm = 0.6407, lr_0 = 5.4150e-04
Loss = 1.7716e-02, PNorm = 148.3672, GNorm = 0.2592, lr_0 = 5.4113e-04
Loss = 1.4405e-02, PNorm = 148.4018, GNorm = 0.1683, lr_0 = 5.4076e-04
Loss = 1.5204e-02, PNorm = 148.4317, GNorm = 0.3218, lr_0 = 5.4039e-04
Loss = 1.4175e-02, PNorm = 148.4667, GNorm = 0.1246, lr_0 = 5.4002e-04
Loss = 2.1779e-02, PNorm = 148.5054, GNorm = 0.3844, lr_0 = 5.3965e-04
Loss = 1.7269e-02, PNorm = 148.5466, GNorm = 0.4186, lr_0 = 5.3928e-04
Loss = 1.5193e-02, PNorm = 148.5811, GNorm = 0.1616, lr_0 = 5.3891e-04
Loss = 1.9415e-02, PNorm = 148.6172, GNorm = 0.1847, lr_0 = 5.3854e-04
Loss = 1.5281e-02, PNorm = 148.6544, GNorm = 0.1925, lr_0 = 5.3817e-04
Loss = 2.0465e-02, PNorm = 148.6851, GNorm = 0.5017, lr_0 = 5.3781e-04
Loss = 1.4815e-02, PNorm = 148.7176, GNorm = 0.8610, lr_0 = 5.3744e-04
Loss = 1.6007e-02, PNorm = 148.7486, GNorm = 0.4040, lr_0 = 5.3707e-04
Loss = 1.8230e-02, PNorm = 148.7889, GNorm = 0.6767, lr_0 = 5.3670e-04
Loss = 2.1557e-02, PNorm = 148.8291, GNorm = 0.3855, lr_0 = 5.3633e-04
Loss = 1.3728e-02, PNorm = 148.8683, GNorm = 0.1327, lr_0 = 5.3597e-04
Loss = 2.0423e-02, PNorm = 148.9016, GNorm = 0.1954, lr_0 = 5.3560e-04
Loss = 1.6313e-02, PNorm = 148.9345, GNorm = 0.3206, lr_0 = 5.3523e-04
Loss = 1.5338e-02, PNorm = 148.9681, GNorm = 0.1675, lr_0 = 5.3486e-04
Loss = 1.2889e-02, PNorm = 149.0003, GNorm = 0.3982, lr_0 = 5.3450e-04
Loss = 1.8635e-02, PNorm = 149.0319, GNorm = 0.8719, lr_0 = 5.3413e-04
Loss = 1.9668e-02, PNorm = 149.0644, GNorm = 0.2331, lr_0 = 5.3377e-04
Loss = 1.6613e-02, PNorm = 149.1067, GNorm = 0.3033, lr_0 = 5.3340e-04
Loss = 2.2347e-02, PNorm = 149.1435, GNorm = 0.7420, lr_0 = 5.3304e-04
Loss = 1.8826e-02, PNorm = 149.1867, GNorm = 0.4367, lr_0 = 5.3267e-04
Loss = 1.6065e-02, PNorm = 149.2269, GNorm = 0.6323, lr_0 = 5.3231e-04
Loss = 1.6313e-02, PNorm = 149.2659, GNorm = 0.2102, lr_0 = 5.3194e-04
Loss = 1.7533e-02, PNorm = 149.3022, GNorm = 0.3270, lr_0 = 5.3158e-04
Loss = 1.5875e-02, PNorm = 149.3421, GNorm = 0.4244, lr_0 = 5.3121e-04
Loss = 2.4077e-02, PNorm = 149.3787, GNorm = 0.5753, lr_0 = 5.3085e-04
Loss = 1.6487e-02, PNorm = 149.4226, GNorm = 0.1762, lr_0 = 5.3048e-04
Loss = 2.0866e-02, PNorm = 149.4608, GNorm = 0.1864, lr_0 = 5.3012e-04
Loss = 1.8865e-02, PNorm = 149.5025, GNorm = 0.2837, lr_0 = 5.2976e-04
Loss = 1.8439e-02, PNorm = 149.5443, GNorm = 0.4701, lr_0 = 5.2939e-04
Loss = 1.7899e-02, PNorm = 149.5819, GNorm = 0.3066, lr_0 = 5.2903e-04
Loss = 1.9177e-02, PNorm = 149.6189, GNorm = 0.3560, lr_0 = 5.2867e-04
Loss = 1.8555e-02, PNorm = 149.6500, GNorm = 0.4860, lr_0 = 5.2831e-04
Loss = 1.8447e-02, PNorm = 149.6873, GNorm = 0.2007, lr_0 = 5.2795e-04
Loss = 2.0240e-02, PNorm = 149.7292, GNorm = 0.2200, lr_0 = 5.2758e-04
Loss = 1.4837e-02, PNorm = 149.7713, GNorm = 0.2179, lr_0 = 5.2722e-04
Loss = 1.5964e-02, PNorm = 149.8081, GNorm = 0.2941, lr_0 = 5.2686e-04
Loss = 1.6489e-02, PNorm = 149.8409, GNorm = 0.4265, lr_0 = 5.2650e-04
Loss = 1.9774e-02, PNorm = 149.8773, GNorm = 0.5450, lr_0 = 5.2614e-04
Loss = 1.7177e-02, PNorm = 149.9152, GNorm = 0.1430, lr_0 = 5.2578e-04
Loss = 1.5679e-02, PNorm = 149.9549, GNorm = 0.3439, lr_0 = 5.2542e-04
Loss = 1.5019e-02, PNorm = 149.9923, GNorm = 0.4139, lr_0 = 5.2506e-04
Loss = 1.6939e-02, PNorm = 150.0322, GNorm = 0.2480, lr_0 = 5.2470e-04
Loss = 1.9858e-02, PNorm = 150.0767, GNorm = 0.2699, lr_0 = 5.2434e-04
Loss = 1.6392e-02, PNorm = 150.1238, GNorm = 0.2491, lr_0 = 5.2398e-04
Loss = 1.5839e-02, PNorm = 150.1620, GNorm = 0.4681, lr_0 = 5.2362e-04
Loss = 1.7096e-02, PNorm = 150.1980, GNorm = 0.2610, lr_0 = 5.2326e-04
Loss = 1.5721e-02, PNorm = 150.2350, GNorm = 0.1866, lr_0 = 5.2290e-04
Loss = 1.7610e-02, PNorm = 150.2733, GNorm = 0.5369, lr_0 = 5.2255e-04
Loss = 2.0787e-02, PNorm = 150.3125, GNorm = 0.7257, lr_0 = 5.2219e-04
Loss = 1.7962e-02, PNorm = 150.3546, GNorm = 0.5080, lr_0 = 5.2183e-04
Loss = 1.5913e-02, PNorm = 150.3967, GNorm = 0.3238, lr_0 = 5.2147e-04
Loss = 1.7738e-02, PNorm = 150.4319, GNorm = 0.5741, lr_0 = 5.2112e-04
Loss = 2.1709e-02, PNorm = 150.4699, GNorm = 0.2285, lr_0 = 5.2076e-04
Loss = 1.8764e-02, PNorm = 150.5139, GNorm = 0.4386, lr_0 = 5.2040e-04
Loss = 1.7757e-02, PNorm = 150.5588, GNorm = 0.4884, lr_0 = 5.2005e-04
Loss = 2.0698e-02, PNorm = 150.5988, GNorm = 0.8069, lr_0 = 5.1969e-04
Loss = 1.8474e-02, PNorm = 150.6431, GNorm = 0.2754, lr_0 = 5.1933e-04
Loss = 1.8040e-02, PNorm = 150.6863, GNorm = 0.4322, lr_0 = 5.1898e-04
Loss = 1.6533e-02, PNorm = 150.7297, GNorm = 0.7446, lr_0 = 5.1862e-04
Loss = 1.8139e-02, PNorm = 150.7674, GNorm = 0.1778, lr_0 = 5.1827e-04
Loss = 1.5978e-02, PNorm = 150.8074, GNorm = 0.2792, lr_0 = 5.1791e-04
Validation mae = 0.283111
Epoch 10
Loss = 1.4879e-02, PNorm = 150.8428, GNorm = 0.1919, lr_0 = 5.1756e-04
Loss = 1.7640e-02, PNorm = 150.8706, GNorm = 0.2569, lr_0 = 5.1720e-04
Loss = 1.3255e-02, PNorm = 150.8994, GNorm = 0.5432, lr_0 = 5.1685e-04
Loss = 1.5324e-02, PNorm = 150.9225, GNorm = 0.2968, lr_0 = 5.1649e-04
Loss = 1.5080e-02, PNorm = 150.9499, GNorm = 0.3737, lr_0 = 5.1614e-04
Loss = 1.3656e-02, PNorm = 150.9769, GNorm = 0.3098, lr_0 = 5.1579e-04
Loss = 1.2985e-02, PNorm = 151.0061, GNorm = 0.2301, lr_0 = 5.1543e-04
Loss = 1.3873e-02, PNorm = 151.0291, GNorm = 0.3190, lr_0 = 5.1508e-04
Loss = 1.7055e-02, PNorm = 151.0549, GNorm = 0.8434, lr_0 = 5.1473e-04
Loss = 1.4484e-02, PNorm = 151.0814, GNorm = 0.2082, lr_0 = 5.1437e-04
Loss = 1.3430e-02, PNorm = 151.1094, GNorm = 0.3274, lr_0 = 5.1402e-04
Loss = 1.4890e-02, PNorm = 151.1376, GNorm = 0.7617, lr_0 = 5.1367e-04
Loss = 1.3765e-02, PNorm = 151.1646, GNorm = 0.3187, lr_0 = 5.1332e-04
Loss = 1.4375e-02, PNorm = 151.1941, GNorm = 0.2651, lr_0 = 5.1297e-04
Loss = 1.3991e-02, PNorm = 151.2215, GNorm = 0.3048, lr_0 = 5.1262e-04
Loss = 1.5570e-02, PNorm = 151.2518, GNorm = 0.2108, lr_0 = 5.1226e-04
Loss = 1.8371e-02, PNorm = 151.2747, GNorm = 0.7219, lr_0 = 5.1191e-04
Loss = 1.3742e-02, PNorm = 151.2982, GNorm = 0.4367, lr_0 = 5.1156e-04
Loss = 1.5907e-02, PNorm = 151.3267, GNorm = 0.2296, lr_0 = 5.1121e-04
Loss = 1.4275e-02, PNorm = 151.3554, GNorm = 0.3097, lr_0 = 5.1086e-04
Loss = 1.5408e-02, PNorm = 151.3845, GNorm = 0.1128, lr_0 = 5.1051e-04
Loss = 1.3960e-02, PNorm = 151.4179, GNorm = 0.6723, lr_0 = 5.1016e-04
Loss = 1.4159e-02, PNorm = 151.4473, GNorm = 0.1254, lr_0 = 5.0981e-04
Loss = 1.3456e-02, PNorm = 151.4761, GNorm = 0.3480, lr_0 = 5.0946e-04
Loss = 1.5134e-02, PNorm = 151.5007, GNorm = 0.2996, lr_0 = 5.0911e-04
Loss = 1.0526e-02, PNorm = 151.5278, GNorm = 0.4016, lr_0 = 5.0877e-04
Loss = 1.2525e-02, PNorm = 151.5515, GNorm = 0.2458, lr_0 = 5.0842e-04
Loss = 1.4978e-02, PNorm = 151.5790, GNorm = 0.3011, lr_0 = 5.0807e-04
Loss = 1.2378e-02, PNorm = 151.6042, GNorm = 0.3225, lr_0 = 5.0772e-04
Loss = 1.6293e-02, PNorm = 151.6344, GNorm = 0.4241, lr_0 = 5.0737e-04
Loss = 1.3617e-02, PNorm = 151.6640, GNorm = 0.2184, lr_0 = 5.0703e-04
Loss = 1.3321e-02, PNorm = 151.6969, GNorm = 0.2107, lr_0 = 5.0668e-04
Loss = 1.4016e-02, PNorm = 151.7256, GNorm = 0.1549, lr_0 = 5.0633e-04
Loss = 1.3004e-02, PNorm = 151.7507, GNorm = 0.3275, lr_0 = 5.0598e-04
Loss = 1.8214e-02, PNorm = 151.7765, GNorm = 0.2124, lr_0 = 5.0564e-04
Loss = 1.3477e-02, PNorm = 151.8030, GNorm = 0.2160, lr_0 = 5.0529e-04
Loss = 1.2574e-02, PNorm = 151.8315, GNorm = 0.1532, lr_0 = 5.0494e-04
Loss = 1.3351e-02, PNorm = 151.8580, GNorm = 0.1546, lr_0 = 5.0460e-04
Loss = 1.2472e-02, PNorm = 151.8853, GNorm = 0.3287, lr_0 = 5.0425e-04
Loss = 1.4258e-02, PNorm = 151.9157, GNorm = 0.2112, lr_0 = 5.0391e-04
Loss = 1.2674e-02, PNorm = 151.9453, GNorm = 0.1399, lr_0 = 5.0356e-04
Loss = 1.2026e-02, PNorm = 151.9701, GNorm = 0.1305, lr_0 = 5.0322e-04
Loss = 1.2859e-02, PNorm = 151.9964, GNorm = 0.3292, lr_0 = 5.0287e-04
Loss = 1.2591e-02, PNorm = 152.0239, GNorm = 0.6063, lr_0 = 5.0253e-04
Loss = 1.1412e-02, PNorm = 152.0541, GNorm = 0.1402, lr_0 = 5.0218e-04
Loss = 1.3600e-02, PNorm = 152.0805, GNorm = 0.5000, lr_0 = 5.0184e-04
Loss = 1.1334e-02, PNorm = 152.1060, GNorm = 0.3381, lr_0 = 5.0150e-04
Loss = 1.3463e-02, PNorm = 152.1253, GNorm = 0.5284, lr_0 = 5.0115e-04
Loss = 1.6513e-02, PNorm = 152.1503, GNorm = 0.3310, lr_0 = 5.0081e-04
Loss = 1.3350e-02, PNorm = 152.1783, GNorm = 0.2964, lr_0 = 5.0047e-04
Loss = 1.3947e-02, PNorm = 152.2038, GNorm = 0.1411, lr_0 = 5.0012e-04
Loss = 1.4326e-02, PNorm = 152.2305, GNorm = 0.2805, lr_0 = 4.9978e-04
Loss = 1.3198e-02, PNorm = 152.2602, GNorm = 0.5898, lr_0 = 4.9944e-04
Loss = 1.3485e-02, PNorm = 152.2931, GNorm = 0.5995, lr_0 = 4.9910e-04
Loss = 1.4280e-02, PNorm = 152.3239, GNorm = 0.2533, lr_0 = 4.9875e-04
Loss = 1.4728e-02, PNorm = 152.3548, GNorm = 0.3951, lr_0 = 4.9841e-04
Loss = 1.1868e-02, PNorm = 152.3847, GNorm = 0.1990, lr_0 = 4.9807e-04
Loss = 1.4766e-02, PNorm = 152.4131, GNorm = 0.2752, lr_0 = 4.9773e-04
Loss = 1.4555e-02, PNorm = 152.4421, GNorm = 0.2433, lr_0 = 4.9739e-04
Loss = 1.7156e-02, PNorm = 152.4729, GNorm = 0.5622, lr_0 = 4.9705e-04
Loss = 1.4802e-02, PNorm = 152.5044, GNorm = 0.2652, lr_0 = 4.9671e-04
Loss = 1.2351e-02, PNorm = 152.5339, GNorm = 0.3540, lr_0 = 4.9637e-04
Loss = 1.6502e-02, PNorm = 152.5628, GNorm = 0.2759, lr_0 = 4.9603e-04
Loss = 1.5716e-02, PNorm = 152.5977, GNorm = 0.8368, lr_0 = 4.9569e-04
Loss = 1.3598e-02, PNorm = 152.6307, GNorm = 0.1991, lr_0 = 4.9535e-04
Loss = 1.3835e-02, PNorm = 152.6642, GNorm = 0.3189, lr_0 = 4.9501e-04
Loss = 1.5294e-02, PNorm = 152.6954, GNorm = 0.5127, lr_0 = 4.9467e-04
Loss = 1.1722e-02, PNorm = 152.7291, GNorm = 0.6114, lr_0 = 4.9433e-04
Loss = 1.3022e-02, PNorm = 152.7591, GNorm = 0.2753, lr_0 = 4.9399e-04
Loss = 1.7119e-02, PNorm = 152.7930, GNorm = 0.4260, lr_0 = 4.9365e-04
Loss = 1.6284e-02, PNorm = 152.8260, GNorm = 0.2843, lr_0 = 4.9332e-04
Loss = 1.2640e-02, PNorm = 152.8595, GNorm = 0.1853, lr_0 = 4.9298e-04
Loss = 1.6340e-02, PNorm = 152.8910, GNorm = 0.7282, lr_0 = 4.9264e-04
Loss = 1.3504e-02, PNorm = 152.9238, GNorm = 0.2088, lr_0 = 4.9230e-04
Loss = 1.3135e-02, PNorm = 152.9576, GNorm = 0.2284, lr_0 = 4.9197e-04
Loss = 1.5135e-02, PNorm = 152.9910, GNorm = 0.1967, lr_0 = 4.9163e-04
Loss = 1.4771e-02, PNorm = 153.0229, GNorm = 0.2130, lr_0 = 4.9129e-04
Loss = 1.3748e-02, PNorm = 153.0541, GNorm = 0.6601, lr_0 = 4.9095e-04
Loss = 1.3148e-02, PNorm = 153.0904, GNorm = 0.3482, lr_0 = 4.9062e-04
Loss = 1.9908e-02, PNorm = 153.1235, GNorm = 0.2358, lr_0 = 4.9028e-04
Loss = 1.3871e-02, PNorm = 153.1567, GNorm = 0.6212, lr_0 = 4.8995e-04
Loss = 1.3582e-02, PNorm = 153.1847, GNorm = 0.3268, lr_0 = 4.8961e-04
Loss = 1.5174e-02, PNorm = 153.2129, GNorm = 0.3730, lr_0 = 4.8928e-04
Loss = 1.6362e-02, PNorm = 153.2434, GNorm = 0.2653, lr_0 = 4.8894e-04
Loss = 1.3573e-02, PNorm = 153.2741, GNorm = 0.2895, lr_0 = 4.8861e-04
Loss = 1.3465e-02, PNorm = 153.3070, GNorm = 0.2195, lr_0 = 4.8827e-04
Loss = 1.2012e-02, PNorm = 153.3381, GNorm = 0.1513, lr_0 = 4.8794e-04
Loss = 1.5634e-02, PNorm = 153.3715, GNorm = 0.4488, lr_0 = 4.8760e-04
Loss = 1.3688e-02, PNorm = 153.4034, GNorm = 0.2696, lr_0 = 4.8727e-04
Loss = 1.3733e-02, PNorm = 153.4426, GNorm = 0.3564, lr_0 = 4.8693e-04
Loss = 1.7370e-02, PNorm = 153.4726, GNorm = 0.7438, lr_0 = 4.8660e-04
Loss = 1.3659e-02, PNorm = 153.5060, GNorm = 0.2672, lr_0 = 4.8627e-04
Loss = 1.1236e-02, PNorm = 153.5386, GNorm = 0.1994, lr_0 = 4.8593e-04
Loss = 1.7266e-02, PNorm = 153.5704, GNorm = 0.1981, lr_0 = 4.8560e-04
Loss = 1.6849e-02, PNorm = 153.6034, GNorm = 0.2239, lr_0 = 4.8527e-04
Loss = 1.2398e-02, PNorm = 153.6355, GNorm = 0.2989, lr_0 = 4.8494e-04
Loss = 1.3633e-02, PNorm = 153.6680, GNorm = 0.4581, lr_0 = 4.8460e-04
Loss = 1.2133e-02, PNorm = 153.6952, GNorm = 0.4659, lr_0 = 4.8427e-04
Loss = 1.3305e-02, PNorm = 153.7237, GNorm = 0.4308, lr_0 = 4.8394e-04
Loss = 1.8349e-02, PNorm = 153.7599, GNorm = 0.2424, lr_0 = 4.8361e-04
Loss = 1.4709e-02, PNorm = 153.7932, GNorm = 0.3253, lr_0 = 4.8328e-04
Loss = 1.6897e-02, PNorm = 153.8289, GNorm = 0.5826, lr_0 = 4.8295e-04
Loss = 1.3713e-02, PNorm = 153.8616, GNorm = 0.1887, lr_0 = 4.8262e-04
Loss = 1.6867e-02, PNorm = 153.9009, GNorm = 0.4253, lr_0 = 4.8228e-04
Loss = 1.5486e-02, PNorm = 153.9402, GNorm = 0.3510, lr_0 = 4.8195e-04
Loss = 1.6067e-02, PNorm = 153.9741, GNorm = 0.7649, lr_0 = 4.8162e-04
Loss = 1.3343e-02, PNorm = 154.0018, GNorm = 0.2340, lr_0 = 4.8129e-04
Loss = 1.6226e-02, PNorm = 154.0306, GNorm = 0.2091, lr_0 = 4.8096e-04
Loss = 1.5604e-02, PNorm = 154.0631, GNorm = 0.4532, lr_0 = 4.8064e-04
Loss = 1.5442e-02, PNorm = 154.0975, GNorm = 0.6237, lr_0 = 4.8031e-04
Loss = 1.5196e-02, PNorm = 154.1375, GNorm = 0.1993, lr_0 = 4.7998e-04
Loss = 1.3288e-02, PNorm = 154.1740, GNorm = 0.2571, lr_0 = 4.7965e-04
Loss = 1.7205e-02, PNorm = 154.2102, GNorm = 0.1770, lr_0 = 4.7932e-04
Loss = 1.9716e-02, PNorm = 154.2513, GNorm = 0.2507, lr_0 = 4.7899e-04
Loss = 1.4543e-02, PNorm = 154.2832, GNorm = 0.1574, lr_0 = 4.7866e-04
Loss = 1.4935e-02, PNorm = 154.3147, GNorm = 0.1227, lr_0 = 4.7833e-04
Loss = 1.4565e-02, PNorm = 154.3422, GNorm = 0.2376, lr_0 = 4.7801e-04
Loss = 1.4052e-02, PNorm = 154.3718, GNorm = 0.4069, lr_0 = 4.7768e-04
Loss = 1.4940e-02, PNorm = 154.3983, GNorm = 0.8664, lr_0 = 4.7735e-04
Loss = 1.6483e-02, PNorm = 154.4278, GNorm = 1.0071, lr_0 = 4.7703e-04
Validation mae = 0.281846
Epoch 11
Loss = 1.1481e-02, PNorm = 154.4564, GNorm = 0.2220, lr_0 = 4.7670e-04
Loss = 1.3556e-02, PNorm = 154.4808, GNorm = 0.5341, lr_0 = 4.7637e-04
Loss = 1.2006e-02, PNorm = 154.5039, GNorm = 0.3094, lr_0 = 4.7605e-04
Loss = 1.2276e-02, PNorm = 154.5269, GNorm = 0.2642, lr_0 = 4.7572e-04
Loss = 1.1956e-02, PNorm = 154.5462, GNorm = 0.5164, lr_0 = 4.7539e-04
Loss = 1.3033e-02, PNorm = 154.5692, GNorm = 0.3105, lr_0 = 4.7507e-04
Loss = 1.2215e-02, PNorm = 154.5965, GNorm = 0.3461, lr_0 = 4.7474e-04
Loss = 1.7740e-02, PNorm = 154.6207, GNorm = 0.8579, lr_0 = 4.7442e-04
Loss = 1.5572e-02, PNorm = 154.6396, GNorm = 0.5157, lr_0 = 4.7409e-04
Loss = 1.0283e-02, PNorm = 154.6580, GNorm = 0.2956, lr_0 = 4.7377e-04
Loss = 1.0921e-02, PNorm = 154.6799, GNorm = 0.1944, lr_0 = 4.7344e-04
Loss = 1.2195e-02, PNorm = 154.7025, GNorm = 0.3373, lr_0 = 4.7312e-04
Loss = 1.1708e-02, PNorm = 154.7285, GNorm = 0.4150, lr_0 = 4.7279e-04
Loss = 1.1785e-02, PNorm = 154.7544, GNorm = 0.1825, lr_0 = 4.7247e-04
Loss = 1.2517e-02, PNorm = 154.7715, GNorm = 0.4394, lr_0 = 4.7215e-04
Loss = 1.3024e-02, PNorm = 154.7961, GNorm = 0.3883, lr_0 = 4.7182e-04
Loss = 1.3861e-02, PNorm = 154.8240, GNorm = 0.1977, lr_0 = 4.7150e-04
Loss = 1.2285e-02, PNorm = 154.8459, GNorm = 0.6705, lr_0 = 4.7118e-04
Loss = 1.3030e-02, PNorm = 154.8712, GNorm = 0.2895, lr_0 = 4.7085e-04
Loss = 1.4378e-02, PNorm = 154.8935, GNorm = 0.3177, lr_0 = 4.7053e-04
Loss = 1.1280e-02, PNorm = 154.9216, GNorm = 0.1506, lr_0 = 4.7021e-04
Loss = 1.2278e-02, PNorm = 154.9466, GNorm = 0.3761, lr_0 = 4.6989e-04
Loss = 1.2620e-02, PNorm = 154.9706, GNorm = 0.1592, lr_0 = 4.6957e-04
Loss = 1.3929e-02, PNorm = 154.9942, GNorm = 0.4867, lr_0 = 4.6924e-04
Loss = 1.3803e-02, PNorm = 155.0180, GNorm = 0.3093, lr_0 = 4.6892e-04
Loss = 1.1883e-02, PNorm = 155.0405, GNorm = 0.2221, lr_0 = 4.6860e-04
Loss = 1.1355e-02, PNorm = 155.0656, GNorm = 0.1284, lr_0 = 4.6828e-04
Loss = 1.1427e-02, PNorm = 155.0915, GNorm = 0.1949, lr_0 = 4.6796e-04
Loss = 1.1138e-02, PNorm = 155.1165, GNorm = 0.3642, lr_0 = 4.6764e-04
Loss = 1.2945e-02, PNorm = 155.1416, GNorm = 0.3529, lr_0 = 4.6732e-04
Loss = 1.0982e-02, PNorm = 155.1623, GNorm = 0.1510, lr_0 = 4.6700e-04
Loss = 1.2809e-02, PNorm = 155.1847, GNorm = 0.1996, lr_0 = 4.6668e-04
Loss = 1.3338e-02, PNorm = 155.2073, GNorm = 0.1825, lr_0 = 4.6636e-04
Loss = 1.1724e-02, PNorm = 155.2330, GNorm = 0.2216, lr_0 = 4.6604e-04
Loss = 1.2534e-02, PNorm = 155.2574, GNorm = 0.6291, lr_0 = 4.6572e-04
Loss = 1.4005e-02, PNorm = 155.2824, GNorm = 0.6387, lr_0 = 4.6540e-04
Loss = 1.1890e-02, PNorm = 155.3132, GNorm = 0.4058, lr_0 = 4.6508e-04
Loss = 1.1364e-02, PNorm = 155.3381, GNorm = 0.5035, lr_0 = 4.6476e-04
Loss = 1.3759e-02, PNorm = 155.3632, GNorm = 0.3759, lr_0 = 4.6445e-04
Loss = 1.1119e-02, PNorm = 155.3902, GNorm = 0.2008, lr_0 = 4.6413e-04
Loss = 1.1266e-02, PNorm = 155.4094, GNorm = 0.5216, lr_0 = 4.6381e-04
Loss = 1.5055e-02, PNorm = 155.4326, GNorm = 0.3503, lr_0 = 4.6349e-04
Loss = 1.1306e-02, PNorm = 155.4569, GNorm = 0.3845, lr_0 = 4.6317e-04
Loss = 1.0404e-02, PNorm = 155.4846, GNorm = 0.1297, lr_0 = 4.6286e-04
Loss = 1.4074e-02, PNorm = 155.5097, GNorm = 0.6536, lr_0 = 4.6254e-04
Loss = 1.0895e-02, PNorm = 155.5374, GNorm = 0.2891, lr_0 = 4.6222e-04
Loss = 1.1875e-02, PNorm = 155.5595, GNorm = 0.3858, lr_0 = 4.6191e-04
Loss = 1.3897e-02, PNorm = 155.5844, GNorm = 0.3911, lr_0 = 4.6159e-04
Loss = 1.2310e-02, PNorm = 155.6108, GNorm = 0.3892, lr_0 = 4.6127e-04
Loss = 1.1984e-02, PNorm = 155.6398, GNorm = 0.3168, lr_0 = 4.6096e-04
Loss = 1.2110e-02, PNorm = 155.6713, GNorm = 0.3204, lr_0 = 4.6064e-04
Loss = 1.3948e-02, PNorm = 155.6978, GNorm = 0.5953, lr_0 = 4.6033e-04
Loss = 9.8974e-03, PNorm = 155.7260, GNorm = 0.2135, lr_0 = 4.6001e-04
Loss = 9.8971e-03, PNorm = 155.7461, GNorm = 0.3780, lr_0 = 4.5970e-04
Loss = 1.4025e-02, PNorm = 155.7695, GNorm = 0.4038, lr_0 = 4.5938e-04
Loss = 1.2522e-02, PNorm = 155.7932, GNorm = 0.3044, lr_0 = 4.5907e-04
Loss = 1.0232e-02, PNorm = 155.8181, GNorm = 0.1967, lr_0 = 4.5875e-04
Loss = 9.6043e-03, PNorm = 155.8458, GNorm = 0.3258, lr_0 = 4.5844e-04
Loss = 1.2680e-02, PNorm = 155.8738, GNorm = 0.6146, lr_0 = 4.5812e-04
Loss = 1.2057e-02, PNorm = 155.8995, GNorm = 0.5294, lr_0 = 4.5781e-04
Loss = 1.3231e-02, PNorm = 155.9213, GNorm = 0.5326, lr_0 = 4.5750e-04
Loss = 1.0560e-02, PNorm = 155.9427, GNorm = 0.2518, lr_0 = 4.5718e-04
Loss = 1.1840e-02, PNorm = 155.9644, GNorm = 0.4130, lr_0 = 4.5687e-04
Loss = 1.3569e-02, PNorm = 155.9917, GNorm = 0.1536, lr_0 = 4.5656e-04
Loss = 1.5955e-02, PNorm = 156.0237, GNorm = 0.4101, lr_0 = 4.5624e-04
Loss = 1.2584e-02, PNorm = 156.0537, GNorm = 0.2529, lr_0 = 4.5593e-04
Loss = 1.1926e-02, PNorm = 156.0819, GNorm = 0.1631, lr_0 = 4.5562e-04
Loss = 1.1215e-02, PNorm = 156.1109, GNorm = 0.1747, lr_0 = 4.5531e-04
Loss = 1.1801e-02, PNorm = 156.1409, GNorm = 0.4215, lr_0 = 4.5499e-04
Loss = 1.3879e-02, PNorm = 156.1688, GNorm = 0.6279, lr_0 = 4.5468e-04
Loss = 1.2878e-02, PNorm = 156.1940, GNorm = 0.7789, lr_0 = 4.5437e-04
Loss = 1.2027e-02, PNorm = 156.2202, GNorm = 0.1515, lr_0 = 4.5406e-04
Loss = 1.2129e-02, PNorm = 156.2472, GNorm = 0.6861, lr_0 = 4.5375e-04
Loss = 1.1285e-02, PNorm = 156.2754, GNorm = 0.1269, lr_0 = 4.5344e-04
Loss = 1.2110e-02, PNorm = 156.3064, GNorm = 0.2733, lr_0 = 4.5313e-04
Loss = 1.8423e-02, PNorm = 156.3341, GNorm = 0.4265, lr_0 = 4.5282e-04
Loss = 1.3467e-02, PNorm = 156.3615, GNorm = 0.3528, lr_0 = 4.5251e-04
Loss = 1.1914e-02, PNorm = 156.3889, GNorm = 0.2451, lr_0 = 4.5220e-04
Loss = 1.1607e-02, PNorm = 156.4205, GNorm = 0.5348, lr_0 = 4.5189e-04
Loss = 1.1494e-02, PNorm = 156.4520, GNorm = 0.1953, lr_0 = 4.5158e-04
Loss = 1.2564e-02, PNorm = 156.4785, GNorm = 0.6286, lr_0 = 4.5127e-04
Loss = 1.2208e-02, PNorm = 156.5064, GNorm = 0.2645, lr_0 = 4.5096e-04
Loss = 1.6287e-02, PNorm = 156.5337, GNorm = 1.1479, lr_0 = 4.5065e-04
Loss = 1.7162e-02, PNorm = 156.5576, GNorm = 0.3368, lr_0 = 4.5034e-04
Loss = 1.3255e-02, PNorm = 156.5886, GNorm = 0.1546, lr_0 = 4.5003e-04
Loss = 1.1384e-02, PNorm = 156.6170, GNorm = 0.4390, lr_0 = 4.4972e-04
Loss = 1.2493e-02, PNorm = 156.6464, GNorm = 0.5617, lr_0 = 4.4942e-04
Loss = 1.1872e-02, PNorm = 156.6795, GNorm = 0.3425, lr_0 = 4.4911e-04
Loss = 1.3391e-02, PNorm = 156.7123, GNorm = 0.3152, lr_0 = 4.4880e-04
Loss = 1.2742e-02, PNorm = 156.7415, GNorm = 0.2867, lr_0 = 4.4849e-04
Loss = 1.1300e-02, PNorm = 156.7700, GNorm = 0.1037, lr_0 = 4.4819e-04
Loss = 1.0003e-02, PNorm = 156.7943, GNorm = 0.3325, lr_0 = 4.4788e-04
Loss = 1.1436e-02, PNorm = 156.8186, GNorm = 0.2574, lr_0 = 4.4757e-04
Loss = 1.1452e-02, PNorm = 156.8453, GNorm = 0.1750, lr_0 = 4.4727e-04
Loss = 1.0858e-02, PNorm = 156.8722, GNorm = 0.5991, lr_0 = 4.4696e-04
Loss = 1.0687e-02, PNorm = 156.8985, GNorm = 0.3759, lr_0 = 4.4665e-04
Loss = 1.2066e-02, PNorm = 156.9289, GNorm = 0.2987, lr_0 = 4.4635e-04
Loss = 1.2376e-02, PNorm = 156.9583, GNorm = 0.3335, lr_0 = 4.4604e-04
Loss = 1.0524e-02, PNorm = 156.9895, GNorm = 0.2566, lr_0 = 4.4574e-04
Loss = 1.5114e-02, PNorm = 157.0179, GNorm = 0.2601, lr_0 = 4.4543e-04
Loss = 1.2527e-02, PNorm = 157.0491, GNorm = 0.3445, lr_0 = 4.4513e-04
Loss = 1.3967e-02, PNorm = 157.0785, GNorm = 0.1243, lr_0 = 4.4482e-04
Loss = 1.0616e-02, PNorm = 157.1050, GNorm = 0.2182, lr_0 = 4.4452e-04
Loss = 1.1183e-02, PNorm = 157.1337, GNorm = 0.4655, lr_0 = 4.4421e-04
Loss = 1.1135e-02, PNorm = 157.1614, GNorm = 0.5332, lr_0 = 4.4391e-04
Loss = 1.1490e-02, PNorm = 157.1920, GNorm = 0.3118, lr_0 = 4.4360e-04
Loss = 9.6060e-03, PNorm = 157.2158, GNorm = 0.5137, lr_0 = 4.4330e-04
Loss = 1.2202e-02, PNorm = 157.2454, GNorm = 0.4651, lr_0 = 4.4299e-04
Loss = 1.0046e-02, PNorm = 157.2708, GNorm = 0.2400, lr_0 = 4.4269e-04
Loss = 1.2187e-02, PNorm = 157.3013, GNorm = 0.4055, lr_0 = 4.4239e-04
Loss = 1.2323e-02, PNorm = 157.3317, GNorm = 0.1437, lr_0 = 4.4209e-04
Loss = 9.7370e-03, PNorm = 157.3591, GNorm = 0.1294, lr_0 = 4.4178e-04
Loss = 1.2889e-02, PNorm = 157.3859, GNorm = 0.3932, lr_0 = 4.4148e-04
Loss = 1.0730e-02, PNorm = 157.4165, GNorm = 0.1844, lr_0 = 4.4118e-04
Loss = 1.1113e-02, PNorm = 157.4425, GNorm = 0.2821, lr_0 = 4.4088e-04
Loss = 1.1588e-02, PNorm = 157.4723, GNorm = 0.5733, lr_0 = 4.4057e-04
Loss = 9.8632e-03, PNorm = 157.4949, GNorm = 0.3126, lr_0 = 4.4027e-04
Loss = 1.1518e-02, PNorm = 157.5223, GNorm = 0.3834, lr_0 = 4.3997e-04
Loss = 1.2079e-02, PNorm = 157.5463, GNorm = 0.2086, lr_0 = 4.3967e-04
Loss = 1.0024e-02, PNorm = 157.5718, GNorm = 0.3631, lr_0 = 4.3937e-04
Validation mae = 0.281694
Epoch 12
Loss = 1.1900e-02, PNorm = 157.5888, GNorm = 0.8951, lr_0 = 4.3907e-04
Loss = 1.0610e-02, PNorm = 157.6061, GNorm = 0.2335, lr_0 = 4.3877e-04
Loss = 1.1122e-02, PNorm = 157.6245, GNorm = 0.1014, lr_0 = 4.3846e-04
Loss = 1.0118e-02, PNorm = 157.6419, GNorm = 0.2163, lr_0 = 4.3816e-04
Loss = 1.0387e-02, PNorm = 157.6617, GNorm = 0.3705, lr_0 = 4.3786e-04
Loss = 9.5460e-03, PNorm = 157.6790, GNorm = 0.1951, lr_0 = 4.3756e-04
Loss = 9.9336e-03, PNorm = 157.7020, GNorm = 0.1557, lr_0 = 4.3726e-04
Loss = 1.1592e-02, PNorm = 157.7240, GNorm = 0.2078, lr_0 = 4.3696e-04
Loss = 1.0806e-02, PNorm = 157.7418, GNorm = 0.1356, lr_0 = 4.3667e-04
Loss = 9.2037e-03, PNorm = 157.7608, GNorm = 0.4814, lr_0 = 4.3637e-04
Loss = 1.0398e-02, PNorm = 157.7798, GNorm = 0.4386, lr_0 = 4.3607e-04
Loss = 1.0508e-02, PNorm = 157.8016, GNorm = 0.2788, lr_0 = 4.3577e-04
Loss = 7.9491e-03, PNorm = 157.8226, GNorm = 0.1336, lr_0 = 4.3547e-04
Loss = 9.4296e-03, PNorm = 157.8421, GNorm = 0.3988, lr_0 = 4.3517e-04
Loss = 1.4082e-02, PNorm = 157.8604, GNorm = 0.2015, lr_0 = 4.3487e-04
Loss = 9.6460e-03, PNorm = 157.8804, GNorm = 0.1676, lr_0 = 4.3458e-04
Loss = 8.7476e-03, PNorm = 157.9025, GNorm = 0.1177, lr_0 = 4.3428e-04
Loss = 9.9640e-03, PNorm = 157.9196, GNorm = 0.2572, lr_0 = 4.3398e-04
Loss = 1.1683e-02, PNorm = 157.9361, GNorm = 0.1713, lr_0 = 4.3368e-04
Loss = 9.1556e-03, PNorm = 157.9575, GNorm = 0.4158, lr_0 = 4.3339e-04
Loss = 9.3889e-03, PNorm = 157.9803, GNorm = 0.1171, lr_0 = 4.3309e-04
Loss = 9.2704e-03, PNorm = 158.0028, GNorm = 0.2689, lr_0 = 4.3279e-04
Loss = 7.4754e-03, PNorm = 158.0277, GNorm = 0.1303, lr_0 = 4.3250e-04
Loss = 1.0709e-02, PNorm = 158.0453, GNorm = 0.3247, lr_0 = 4.3220e-04
Loss = 9.7710e-03, PNorm = 158.0623, GNorm = 0.2905, lr_0 = 4.3190e-04
Loss = 8.1536e-03, PNorm = 158.0774, GNorm = 0.2286, lr_0 = 4.3161e-04
Loss = 7.4953e-03, PNorm = 158.0948, GNorm = 0.3357, lr_0 = 4.3131e-04
Loss = 1.0146e-02, PNorm = 158.1112, GNorm = 0.1682, lr_0 = 4.3102e-04
Loss = 9.7542e-03, PNorm = 158.1329, GNorm = 0.1880, lr_0 = 4.3072e-04
Loss = 1.2063e-02, PNorm = 158.1515, GNorm = 0.2503, lr_0 = 4.3043e-04
Loss = 8.7650e-03, PNorm = 158.1710, GNorm = 0.2603, lr_0 = 4.3013e-04
Loss = 1.1087e-02, PNorm = 158.1908, GNorm = 0.3229, lr_0 = 4.2984e-04
Loss = 1.0883e-02, PNorm = 158.2098, GNorm = 0.2570, lr_0 = 4.2954e-04
Loss = 1.0033e-02, PNorm = 158.2299, GNorm = 0.2735, lr_0 = 4.2925e-04
Loss = 9.7491e-03, PNorm = 158.2467, GNorm = 0.1333, lr_0 = 4.2895e-04
Loss = 9.5401e-03, PNorm = 158.2666, GNorm = 0.1961, lr_0 = 4.2866e-04
Loss = 1.0798e-02, PNorm = 158.2883, GNorm = 0.1256, lr_0 = 4.2837e-04
Loss = 8.7912e-03, PNorm = 158.3093, GNorm = 0.2209, lr_0 = 4.2807e-04
Loss = 1.0938e-02, PNorm = 158.3302, GNorm = 0.1885, lr_0 = 4.2778e-04
Loss = 8.5132e-03, PNorm = 158.3450, GNorm = 0.1836, lr_0 = 4.2749e-04
Loss = 1.3301e-02, PNorm = 158.3621, GNorm = 0.2071, lr_0 = 4.2719e-04
Loss = 1.0188e-02, PNorm = 158.3839, GNorm = 0.2054, lr_0 = 4.2690e-04
Loss = 8.7835e-03, PNorm = 158.4025, GNorm = 0.2530, lr_0 = 4.2661e-04
Loss = 8.1173e-03, PNorm = 158.4207, GNorm = 0.1641, lr_0 = 4.2632e-04
Loss = 1.0091e-02, PNorm = 158.4374, GNorm = 0.1740, lr_0 = 4.2602e-04
Loss = 9.5231e-03, PNorm = 158.4581, GNorm = 0.1246, lr_0 = 4.2573e-04
Loss = 9.9799e-03, PNorm = 158.4794, GNorm = 0.1441, lr_0 = 4.2544e-04
Loss = 9.3226e-03, PNorm = 158.5026, GNorm = 0.3527, lr_0 = 4.2515e-04
Loss = 9.3543e-03, PNorm = 158.5274, GNorm = 0.0844, lr_0 = 4.2486e-04
Loss = 9.6526e-03, PNorm = 158.5463, GNorm = 0.4597, lr_0 = 4.2457e-04
Loss = 8.8199e-03, PNorm = 158.5661, GNorm = 0.1729, lr_0 = 4.2428e-04
Loss = 9.8026e-03, PNorm = 158.5861, GNorm = 0.3895, lr_0 = 4.2399e-04
Loss = 1.1638e-02, PNorm = 158.6016, GNorm = 0.2873, lr_0 = 4.2370e-04
Loss = 8.4899e-03, PNorm = 158.6252, GNorm = 0.2447, lr_0 = 4.2340e-04
Loss = 1.0582e-02, PNorm = 158.6489, GNorm = 0.2869, lr_0 = 4.2311e-04
Loss = 9.8523e-03, PNorm = 158.6704, GNorm = 0.1394, lr_0 = 4.2283e-04
Loss = 1.0285e-02, PNorm = 158.6944, GNorm = 0.4119, lr_0 = 4.2254e-04
Loss = 9.4926e-03, PNorm = 158.7164, GNorm = 0.1331, lr_0 = 4.2225e-04
Loss = 9.7723e-03, PNorm = 158.7381, GNorm = 0.3097, lr_0 = 4.2196e-04
Loss = 1.0119e-02, PNorm = 158.7606, GNorm = 0.2883, lr_0 = 4.2167e-04
Loss = 1.0615e-02, PNorm = 158.7780, GNorm = 0.2670, lr_0 = 4.2138e-04
Loss = 1.4250e-02, PNorm = 158.7977, GNorm = 0.4306, lr_0 = 4.2109e-04
Loss = 8.8207e-03, PNorm = 158.8192, GNorm = 0.3665, lr_0 = 4.2080e-04
Loss = 8.9305e-03, PNorm = 158.8417, GNorm = 0.3506, lr_0 = 4.2051e-04
Loss = 1.1305e-02, PNorm = 158.8669, GNorm = 0.4720, lr_0 = 4.2023e-04
Loss = 1.0751e-02, PNorm = 158.8920, GNorm = 0.3703, lr_0 = 4.1994e-04
Loss = 9.0418e-03, PNorm = 158.9192, GNorm = 0.1241, lr_0 = 4.1965e-04
Loss = 1.1302e-02, PNorm = 158.9463, GNorm = 0.2514, lr_0 = 4.1936e-04
Loss = 1.0957e-02, PNorm = 158.9699, GNorm = 0.2156, lr_0 = 4.1907e-04
Loss = 9.3374e-03, PNorm = 158.9915, GNorm = 0.2411, lr_0 = 4.1879e-04
Loss = 9.6875e-03, PNorm = 159.0176, GNorm = 0.3110, lr_0 = 4.1850e-04
Loss = 9.6193e-03, PNorm = 159.0407, GNorm = 0.7167, lr_0 = 4.1821e-04
Loss = 1.1733e-02, PNorm = 159.0667, GNorm = 1.1977, lr_0 = 4.1793e-04
Loss = 1.1324e-02, PNorm = 159.0912, GNorm = 0.2403, lr_0 = 4.1764e-04
Loss = 7.8043e-03, PNorm = 159.1177, GNorm = 0.4463, lr_0 = 4.1736e-04
Loss = 1.2562e-02, PNorm = 159.1405, GNorm = 0.3149, lr_0 = 4.1707e-04
Loss = 8.7190e-03, PNorm = 159.1604, GNorm = 0.1232, lr_0 = 4.1678e-04
Loss = 9.4113e-03, PNorm = 159.1825, GNorm = 0.2461, lr_0 = 4.1650e-04
Loss = 1.3384e-02, PNorm = 159.2077, GNorm = 0.2192, lr_0 = 4.1621e-04
Loss = 8.9111e-03, PNorm = 159.2298, GNorm = 0.3892, lr_0 = 4.1593e-04
Loss = 9.5203e-03, PNorm = 159.2519, GNorm = 0.2703, lr_0 = 4.1564e-04
Loss = 1.1177e-02, PNorm = 159.2764, GNorm = 0.2079, lr_0 = 4.1536e-04
Loss = 8.7590e-03, PNorm = 159.3039, GNorm = 0.2649, lr_0 = 4.1507e-04
Loss = 1.1598e-02, PNorm = 159.3259, GNorm = 0.2286, lr_0 = 4.1479e-04
Loss = 1.1155e-02, PNorm = 159.3540, GNorm = 0.2136, lr_0 = 4.1450e-04
Loss = 1.1829e-02, PNorm = 159.3805, GNorm = 0.3415, lr_0 = 4.1422e-04
Loss = 1.1974e-02, PNorm = 159.4029, GNorm = 0.4046, lr_0 = 4.1394e-04
Loss = 1.2236e-02, PNorm = 159.4272, GNorm = 0.1652, lr_0 = 4.1365e-04
Loss = 1.1406e-02, PNorm = 159.4510, GNorm = 0.3534, lr_0 = 4.1337e-04
Loss = 9.8885e-03, PNorm = 159.4745, GNorm = 0.0898, lr_0 = 4.1309e-04
Loss = 1.1437e-02, PNorm = 159.4993, GNorm = 0.2128, lr_0 = 4.1280e-04
Loss = 1.0538e-02, PNorm = 159.5267, GNorm = 0.2235, lr_0 = 4.1252e-04
Loss = 9.7142e-03, PNorm = 159.5488, GNorm = 0.2316, lr_0 = 4.1224e-04
Loss = 8.1697e-03, PNorm = 159.5723, GNorm = 0.3368, lr_0 = 4.1196e-04
Loss = 1.0378e-02, PNorm = 159.5951, GNorm = 0.1591, lr_0 = 4.1167e-04
Loss = 1.1440e-02, PNorm = 159.6204, GNorm = 0.5116, lr_0 = 4.1139e-04
Loss = 9.5662e-03, PNorm = 159.6446, GNorm = 0.3748, lr_0 = 4.1111e-04
Loss = 1.0310e-02, PNorm = 159.6655, GNorm = 0.1825, lr_0 = 4.1083e-04
Loss = 9.5221e-03, PNorm = 159.6863, GNorm = 0.4354, lr_0 = 4.1055e-04
Loss = 1.0305e-02, PNorm = 159.7062, GNorm = 0.3026, lr_0 = 4.1027e-04
Loss = 1.1398e-02, PNorm = 159.7293, GNorm = 0.2850, lr_0 = 4.0998e-04
Loss = 9.7902e-03, PNorm = 159.7505, GNorm = 0.5899, lr_0 = 4.0970e-04
Loss = 1.1154e-02, PNorm = 159.7730, GNorm = 0.3442, lr_0 = 4.0942e-04
Loss = 1.0140e-02, PNorm = 159.7991, GNorm = 0.3077, lr_0 = 4.0914e-04
Loss = 1.0385e-02, PNorm = 159.8244, GNorm = 0.3772, lr_0 = 4.0886e-04
Loss = 8.6786e-03, PNorm = 159.8471, GNorm = 0.1488, lr_0 = 4.0858e-04
Loss = 1.3467e-02, PNorm = 159.8702, GNorm = 0.1769, lr_0 = 4.0830e-04
Loss = 1.0901e-02, PNorm = 159.8942, GNorm = 0.7545, lr_0 = 4.0802e-04
Loss = 9.9561e-03, PNorm = 159.9164, GNorm = 0.3885, lr_0 = 4.0774e-04
Loss = 1.2381e-02, PNorm = 159.9434, GNorm = 0.9261, lr_0 = 4.0746e-04
Loss = 1.1546e-02, PNorm = 159.9675, GNorm = 0.3910, lr_0 = 4.0718e-04
Loss = 8.9980e-03, PNorm = 159.9905, GNorm = 0.6817, lr_0 = 4.0691e-04
Loss = 1.0007e-02, PNorm = 160.0122, GNorm = 0.4350, lr_0 = 4.0663e-04
Loss = 1.1254e-02, PNorm = 160.0362, GNorm = 0.4616, lr_0 = 4.0635e-04
Loss = 1.1919e-02, PNorm = 160.0588, GNorm = 1.3254, lr_0 = 4.0607e-04
Loss = 1.1498e-02, PNorm = 160.0836, GNorm = 0.3545, lr_0 = 4.0579e-04
Loss = 9.4937e-03, PNorm = 160.1120, GNorm = 0.1643, lr_0 = 4.0551e-04
Loss = 1.1513e-02, PNorm = 160.1369, GNorm = 0.5865, lr_0 = 4.0524e-04
Loss = 1.1282e-02, PNorm = 160.1595, GNorm = 0.4758, lr_0 = 4.0496e-04
Loss = 1.0765e-02, PNorm = 160.1807, GNorm = 0.2846, lr_0 = 4.0468e-04
Validation mae = 0.280285
Epoch 13
Loss = 9.3540e-03, PNorm = 160.1982, GNorm = 0.1984, lr_0 = 4.0440e-04
Loss = 9.8745e-03, PNorm = 160.2146, GNorm = 0.4434, lr_0 = 4.0413e-04
Loss = 1.1466e-02, PNorm = 160.2312, GNorm = 0.1216, lr_0 = 4.0385e-04
Loss = 8.7273e-03, PNorm = 160.2489, GNorm = 0.3943, lr_0 = 4.0357e-04
Loss = 8.6841e-03, PNorm = 160.2640, GNorm = 0.3487, lr_0 = 4.0330e-04
Loss = 9.9347e-03, PNorm = 160.2796, GNorm = 0.4095, lr_0 = 4.0302e-04
Loss = 8.3752e-03, PNorm = 160.2987, GNorm = 0.1746, lr_0 = 4.0274e-04
Loss = 9.0905e-03, PNorm = 160.3196, GNorm = 0.2020, lr_0 = 4.0247e-04
Loss = 8.2221e-03, PNorm = 160.3369, GNorm = 0.3843, lr_0 = 4.0219e-04
Loss = 8.8340e-03, PNorm = 160.3506, GNorm = 0.3064, lr_0 = 4.0192e-04
Loss = 8.3612e-03, PNorm = 160.3669, GNorm = 0.1317, lr_0 = 4.0164e-04
Loss = 1.1391e-02, PNorm = 160.3890, GNorm = 0.2300, lr_0 = 4.0137e-04
Loss = 1.0349e-02, PNorm = 160.4070, GNorm = 0.1292, lr_0 = 4.0109e-04
Loss = 8.3695e-03, PNorm = 160.4240, GNorm = 0.1714, lr_0 = 4.0082e-04
Loss = 8.1009e-03, PNorm = 160.4389, GNorm = 0.3344, lr_0 = 4.0054e-04
Loss = 8.1590e-03, PNorm = 160.4549, GNorm = 0.3463, lr_0 = 4.0027e-04
Loss = 8.6980e-03, PNorm = 160.4688, GNorm = 0.3391, lr_0 = 3.9999e-04
Loss = 7.0048e-03, PNorm = 160.4832, GNorm = 0.3866, lr_0 = 3.9972e-04
Loss = 8.1929e-03, PNorm = 160.5004, GNorm = 0.3891, lr_0 = 3.9945e-04
Loss = 9.3568e-03, PNorm = 160.5191, GNorm = 0.3351, lr_0 = 3.9917e-04
Loss = 9.4130e-03, PNorm = 160.5350, GNorm = 0.1563, lr_0 = 3.9890e-04
Loss = 7.7603e-03, PNorm = 160.5540, GNorm = 0.4392, lr_0 = 3.9863e-04
Loss = 8.6313e-03, PNorm = 160.5693, GNorm = 0.2865, lr_0 = 3.9835e-04
Loss = 7.5017e-03, PNorm = 160.5886, GNorm = 0.2696, lr_0 = 3.9808e-04
Loss = 8.7768e-03, PNorm = 160.6019, GNorm = 0.2499, lr_0 = 3.9781e-04
Loss = 8.7054e-03, PNorm = 160.6195, GNorm = 0.1756, lr_0 = 3.9753e-04
Loss = 8.3711e-03, PNorm = 160.6342, GNorm = 0.2985, lr_0 = 3.9726e-04
Loss = 9.2689e-03, PNorm = 160.6497, GNorm = 0.6095, lr_0 = 3.9699e-04
Loss = 8.0225e-03, PNorm = 160.6662, GNorm = 0.1363, lr_0 = 3.9672e-04
Loss = 9.2891e-03, PNorm = 160.6824, GNorm = 0.3057, lr_0 = 3.9645e-04
Loss = 1.0688e-02, PNorm = 160.7021, GNorm = 0.3103, lr_0 = 3.9617e-04
Loss = 7.6555e-03, PNorm = 160.7180, GNorm = 0.3938, lr_0 = 3.9590e-04
Loss = 9.2255e-03, PNorm = 160.7396, GNorm = 0.1969, lr_0 = 3.9563e-04
Loss = 8.0846e-03, PNorm = 160.7577, GNorm = 0.5335, lr_0 = 3.9536e-04
Loss = 1.1501e-02, PNorm = 160.7754, GNorm = 0.1689, lr_0 = 3.9509e-04
Loss = 7.0714e-03, PNorm = 160.7912, GNorm = 0.2561, lr_0 = 3.9482e-04
Loss = 7.2157e-03, PNorm = 160.8034, GNorm = 0.1638, lr_0 = 3.9455e-04
Loss = 8.4805e-03, PNorm = 160.8156, GNorm = 0.1196, lr_0 = 3.9428e-04
Loss = 7.2893e-03, PNorm = 160.8303, GNorm = 0.1143, lr_0 = 3.9401e-04
Loss = 9.1833e-03, PNorm = 160.8488, GNorm = 0.3875, lr_0 = 3.9374e-04
Loss = 8.9328e-03, PNorm = 160.8700, GNorm = 0.2794, lr_0 = 3.9347e-04
Loss = 8.6758e-03, PNorm = 160.8895, GNorm = 0.2023, lr_0 = 3.9320e-04
Loss = 9.2614e-03, PNorm = 160.9070, GNorm = 0.3012, lr_0 = 3.9293e-04
Loss = 6.6495e-03, PNorm = 160.9269, GNorm = 0.1871, lr_0 = 3.9266e-04
Loss = 7.9135e-03, PNorm = 160.9456, GNorm = 0.1783, lr_0 = 3.9239e-04
Loss = 8.8342e-03, PNorm = 160.9636, GNorm = 0.2033, lr_0 = 3.9212e-04
Loss = 1.0353e-02, PNorm = 160.9802, GNorm = 0.2641, lr_0 = 3.9185e-04
Loss = 1.0279e-02, PNorm = 160.9941, GNorm = 0.4629, lr_0 = 3.9159e-04
Loss = 8.3252e-03, PNorm = 161.0124, GNorm = 0.1755, lr_0 = 3.9132e-04
Loss = 8.4822e-03, PNorm = 161.0297, GNorm = 0.3172, lr_0 = 3.9105e-04
Loss = 7.8545e-03, PNorm = 161.0481, GNorm = 0.2154, lr_0 = 3.9078e-04
Loss = 7.4318e-03, PNorm = 161.0657, GNorm = 0.2781, lr_0 = 3.9051e-04
Loss = 7.1209e-03, PNorm = 161.0851, GNorm = 0.2789, lr_0 = 3.9025e-04
Loss = 1.3248e-02, PNorm = 161.1042, GNorm = 0.2078, lr_0 = 3.8998e-04
Loss = 7.7003e-03, PNorm = 161.1195, GNorm = 0.2901, lr_0 = 3.8971e-04
Loss = 7.6986e-03, PNorm = 161.1361, GNorm = 0.2756, lr_0 = 3.8945e-04
Loss = 7.0108e-03, PNorm = 161.1529, GNorm = 0.1215, lr_0 = 3.8918e-04
Loss = 6.4171e-03, PNorm = 161.1696, GNorm = 0.1066, lr_0 = 3.8891e-04
Loss = 7.1476e-03, PNorm = 161.1896, GNorm = 0.1204, lr_0 = 3.8865e-04
Loss = 7.9558e-03, PNorm = 161.2059, GNorm = 0.4916, lr_0 = 3.8838e-04
Loss = 9.0275e-03, PNorm = 161.2291, GNorm = 0.4395, lr_0 = 3.8811e-04
Loss = 7.5851e-03, PNorm = 161.2517, GNorm = 0.3730, lr_0 = 3.8785e-04
Loss = 8.4053e-03, PNorm = 161.2694, GNorm = 0.3382, lr_0 = 3.8758e-04
Loss = 6.7750e-03, PNorm = 161.2854, GNorm = 0.1199, lr_0 = 3.8732e-04
Loss = 8.2811e-03, PNorm = 161.3033, GNorm = 0.3129, lr_0 = 3.8705e-04
Loss = 7.8842e-03, PNorm = 161.3193, GNorm = 0.2056, lr_0 = 3.8679e-04
Loss = 9.5051e-03, PNorm = 161.3390, GNorm = 0.2290, lr_0 = 3.8652e-04
Loss = 9.4384e-03, PNorm = 161.3557, GNorm = 0.1117, lr_0 = 3.8626e-04
Loss = 8.8811e-03, PNorm = 161.3750, GNorm = 0.2124, lr_0 = 3.8599e-04
Loss = 9.3397e-03, PNorm = 161.3937, GNorm = 0.3860, lr_0 = 3.8573e-04
Loss = 8.0299e-03, PNorm = 161.4149, GNorm = 0.2810, lr_0 = 3.8546e-04
Loss = 1.0179e-02, PNorm = 161.4352, GNorm = 0.1393, lr_0 = 3.8520e-04
Loss = 8.7611e-03, PNorm = 161.4501, GNorm = 0.2303, lr_0 = 3.8493e-04
Loss = 1.3318e-02, PNorm = 161.4634, GNorm = 0.3437, lr_0 = 3.8467e-04
Loss = 8.8210e-03, PNorm = 161.4854, GNorm = 0.3484, lr_0 = 3.8441e-04
Loss = 8.1393e-03, PNorm = 161.5054, GNorm = 0.3474, lr_0 = 3.8414e-04
Loss = 8.0199e-03, PNorm = 161.5233, GNorm = 0.4090, lr_0 = 3.8388e-04
Loss = 8.9028e-03, PNorm = 161.5410, GNorm = 0.1623, lr_0 = 3.8362e-04
Loss = 1.0239e-02, PNorm = 161.5588, GNorm = 0.1993, lr_0 = 3.8336e-04
Loss = 8.0810e-03, PNorm = 161.5768, GNorm = 0.3176, lr_0 = 3.8309e-04
Loss = 9.5114e-03, PNorm = 161.5946, GNorm = 0.1922, lr_0 = 3.8283e-04
Loss = 6.4183e-03, PNorm = 161.6148, GNorm = 0.3445, lr_0 = 3.8257e-04
Loss = 9.3986e-03, PNorm = 161.6361, GNorm = 0.1171, lr_0 = 3.8231e-04
Loss = 7.9013e-03, PNorm = 161.6517, GNorm = 0.2031, lr_0 = 3.8204e-04
Loss = 8.0224e-03, PNorm = 161.6714, GNorm = 0.3675, lr_0 = 3.8178e-04
Loss = 7.6543e-03, PNorm = 161.6898, GNorm = 0.2364, lr_0 = 3.8152e-04
Loss = 6.8807e-03, PNorm = 161.7060, GNorm = 0.2700, lr_0 = 3.8126e-04
Loss = 8.6135e-03, PNorm = 161.7221, GNorm = 0.1843, lr_0 = 3.8100e-04
Loss = 9.7590e-03, PNorm = 161.7414, GNorm = 0.2876, lr_0 = 3.8074e-04
Loss = 8.4839e-03, PNorm = 161.7622, GNorm = 0.1308, lr_0 = 3.8048e-04
Loss = 9.5730e-03, PNorm = 161.7790, GNorm = 0.3709, lr_0 = 3.8022e-04
Loss = 8.2056e-03, PNorm = 161.7950, GNorm = 0.2494, lr_0 = 3.7995e-04
Loss = 7.9870e-03, PNorm = 161.8109, GNorm = 0.2190, lr_0 = 3.7969e-04
Loss = 9.2464e-03, PNorm = 161.8316, GNorm = 0.3086, lr_0 = 3.7943e-04
Loss = 7.5931e-03, PNorm = 161.8502, GNorm = 0.3635, lr_0 = 3.7917e-04
Loss = 8.9325e-03, PNorm = 161.8682, GNorm = 0.6023, lr_0 = 3.7891e-04
Loss = 8.1037e-03, PNorm = 161.8872, GNorm = 0.1966, lr_0 = 3.7866e-04
Loss = 7.6133e-03, PNorm = 161.9053, GNorm = 0.4778, lr_0 = 3.7840e-04
Loss = 7.9662e-03, PNorm = 161.9267, GNorm = 0.1626, lr_0 = 3.7814e-04
Loss = 9.0043e-03, PNorm = 161.9452, GNorm = 0.2549, lr_0 = 3.7788e-04
Loss = 8.3369e-03, PNorm = 161.9636, GNorm = 0.1469, lr_0 = 3.7762e-04
Loss = 8.1988e-03, PNorm = 161.9842, GNorm = 0.2651, lr_0 = 3.7736e-04
Loss = 7.9670e-03, PNorm = 162.0027, GNorm = 0.1251, lr_0 = 3.7710e-04
Loss = 9.6544e-03, PNorm = 162.0213, GNorm = 0.2083, lr_0 = 3.7684e-04
Loss = 1.0378e-02, PNorm = 162.0422, GNorm = 0.6831, lr_0 = 3.7659e-04
Loss = 6.4362e-03, PNorm = 162.0636, GNorm = 0.4307, lr_0 = 3.7633e-04
Loss = 8.7485e-03, PNorm = 162.0825, GNorm = 0.1776, lr_0 = 3.7607e-04
Loss = 9.1364e-03, PNorm = 162.0983, GNorm = 0.3154, lr_0 = 3.7581e-04
Loss = 8.5963e-03, PNorm = 162.1188, GNorm = 0.4810, lr_0 = 3.7555e-04
Loss = 1.0488e-02, PNorm = 162.1388, GNorm = 0.1778, lr_0 = 3.7530e-04
Loss = 6.8923e-03, PNorm = 162.1559, GNorm = 0.1828, lr_0 = 3.7504e-04
Loss = 9.8154e-03, PNorm = 162.1697, GNorm = 0.2997, lr_0 = 3.7478e-04
Loss = 9.2669e-03, PNorm = 162.1896, GNorm = 0.1067, lr_0 = 3.7453e-04
Loss = 8.6705e-03, PNorm = 162.2133, GNorm = 0.1908, lr_0 = 3.7427e-04
Loss = 7.0138e-03, PNorm = 162.2324, GNorm = 0.3539, lr_0 = 3.7401e-04
Loss = 6.0911e-03, PNorm = 162.2484, GNorm = 0.0917, lr_0 = 3.7376e-04
Loss = 8.2405e-03, PNorm = 162.2657, GNorm = 0.1468, lr_0 = 3.7350e-04
Loss = 8.2257e-03, PNorm = 162.2844, GNorm = 0.2024, lr_0 = 3.7325e-04
Loss = 9.1985e-03, PNorm = 162.3050, GNorm = 0.0913, lr_0 = 3.7299e-04
Loss = 8.2550e-03, PNorm = 162.3222, GNorm = 0.1048, lr_0 = 3.7273e-04
Validation mae = 0.280414
Epoch 14
Loss = 6.5868e-03, PNorm = 162.3374, GNorm = 0.6224, lr_0 = 3.7248e-04
Loss = 7.0622e-03, PNorm = 162.3524, GNorm = 0.3045, lr_0 = 3.7222e-04
Loss = 8.4925e-03, PNorm = 162.3652, GNorm = 0.1256, lr_0 = 3.7197e-04
Loss = 7.7824e-03, PNorm = 162.3754, GNorm = 0.1826, lr_0 = 3.7171e-04
Loss = 6.5133e-03, PNorm = 162.3922, GNorm = 0.3468, lr_0 = 3.7146e-04
Loss = 7.4764e-03, PNorm = 162.4072, GNorm = 0.1374, lr_0 = 3.7120e-04
Loss = 6.6536e-03, PNorm = 162.4176, GNorm = 0.2722, lr_0 = 3.7095e-04
Loss = 7.4441e-03, PNorm = 162.4289, GNorm = 0.2883, lr_0 = 3.7070e-04
Loss = 6.4752e-03, PNorm = 162.4440, GNorm = 0.1330, lr_0 = 3.7044e-04
Loss = 8.9649e-03, PNorm = 162.4554, GNorm = 0.1472, lr_0 = 3.7019e-04
Loss = 6.5921e-03, PNorm = 162.4684, GNorm = 0.3350, lr_0 = 3.6993e-04
Loss = 7.5208e-03, PNorm = 162.4823, GNorm = 0.2083, lr_0 = 3.6968e-04
Loss = 8.3679e-03, PNorm = 162.4953, GNorm = 0.2739, lr_0 = 3.6943e-04
Loss = 7.5209e-03, PNorm = 162.5063, GNorm = 0.1875, lr_0 = 3.6917e-04
Loss = 7.3854e-03, PNorm = 162.5188, GNorm = 0.2218, lr_0 = 3.6892e-04
Loss = 7.0208e-03, PNorm = 162.5303, GNorm = 0.1270, lr_0 = 3.6867e-04
Loss = 6.4633e-03, PNorm = 162.5445, GNorm = 0.1060, lr_0 = 3.6842e-04
Loss = 8.6524e-03, PNorm = 162.5567, GNorm = 0.3223, lr_0 = 3.6816e-04
Loss = 6.4996e-03, PNorm = 162.5732, GNorm = 0.0762, lr_0 = 3.6791e-04
Loss = 6.5103e-03, PNorm = 162.5888, GNorm = 0.2718, lr_0 = 3.6766e-04
Loss = 9.0670e-03, PNorm = 162.6051, GNorm = 0.3121, lr_0 = 3.6741e-04
Loss = 7.3425e-03, PNorm = 162.6174, GNorm = 0.2742, lr_0 = 3.6716e-04
Loss = 6.1188e-03, PNorm = 162.6306, GNorm = 0.3074, lr_0 = 3.6690e-04
Loss = 6.1828e-03, PNorm = 162.6442, GNorm = 0.2702, lr_0 = 3.6665e-04
Loss = 7.2448e-03, PNorm = 162.6592, GNorm = 0.4042, lr_0 = 3.6640e-04
Loss = 7.3332e-03, PNorm = 162.6738, GNorm = 0.1678, lr_0 = 3.6615e-04
Loss = 6.2906e-03, PNorm = 162.6829, GNorm = 0.1422, lr_0 = 3.6590e-04
Loss = 7.8417e-03, PNorm = 162.6962, GNorm = 0.1620, lr_0 = 3.6565e-04
Loss = 6.7808e-03, PNorm = 162.7145, GNorm = 0.4711, lr_0 = 3.6540e-04
Loss = 6.2436e-03, PNorm = 162.7295, GNorm = 0.1544, lr_0 = 3.6515e-04
Loss = 7.3215e-03, PNorm = 162.7426, GNorm = 0.1853, lr_0 = 3.6490e-04
Loss = 5.9346e-03, PNorm = 162.7544, GNorm = 0.2329, lr_0 = 3.6465e-04
Loss = 7.3065e-03, PNorm = 162.7662, GNorm = 0.1342, lr_0 = 3.6440e-04
Loss = 6.3758e-03, PNorm = 162.7805, GNorm = 0.2935, lr_0 = 3.6415e-04
Loss = 7.2722e-03, PNorm = 162.7918, GNorm = 0.2042, lr_0 = 3.6390e-04
Loss = 7.0612e-03, PNorm = 162.8064, GNorm = 0.1124, lr_0 = 3.6365e-04
Loss = 7.8812e-03, PNorm = 162.8217, GNorm = 0.3887, lr_0 = 3.6340e-04
Loss = 6.3954e-03, PNorm = 162.8338, GNorm = 0.1299, lr_0 = 3.6315e-04
Loss = 6.8757e-03, PNorm = 162.8486, GNorm = 0.2727, lr_0 = 3.6290e-04
Loss = 6.8499e-03, PNorm = 162.8622, GNorm = 0.3679, lr_0 = 3.6266e-04
Loss = 9.8270e-03, PNorm = 162.8741, GNorm = 0.1407, lr_0 = 3.6241e-04
Loss = 6.0445e-03, PNorm = 162.8865, GNorm = 0.2441, lr_0 = 3.6216e-04
Loss = 6.4218e-03, PNorm = 162.9006, GNorm = 0.1859, lr_0 = 3.6191e-04
Loss = 7.4777e-03, PNorm = 162.9121, GNorm = 0.2943, lr_0 = 3.6166e-04
Loss = 7.4909e-03, PNorm = 162.9253, GNorm = 0.1547, lr_0 = 3.6141e-04
Loss = 8.4703e-03, PNorm = 162.9407, GNorm = 0.3071, lr_0 = 3.6117e-04
Loss = 5.7437e-03, PNorm = 162.9539, GNorm = 0.0921, lr_0 = 3.6092e-04
Loss = 6.3457e-03, PNorm = 162.9707, GNorm = 0.0958, lr_0 = 3.6067e-04
Loss = 7.6744e-03, PNorm = 162.9852, GNorm = 0.0993, lr_0 = 3.6043e-04
Loss = 7.1878e-03, PNorm = 163.0014, GNorm = 0.4592, lr_0 = 3.6018e-04
Loss = 8.4051e-03, PNorm = 163.0164, GNorm = 0.2990, lr_0 = 3.5993e-04
Loss = 5.7072e-03, PNorm = 163.0325, GNorm = 0.2467, lr_0 = 3.5969e-04
Loss = 5.9936e-03, PNorm = 163.0488, GNorm = 0.1815, lr_0 = 3.5944e-04
Loss = 7.8958e-03, PNorm = 163.0586, GNorm = 0.3150, lr_0 = 3.5919e-04
Loss = 7.4244e-03, PNorm = 163.0731, GNorm = 0.1757, lr_0 = 3.5895e-04
Loss = 8.3460e-03, PNorm = 163.0888, GNorm = 0.4219, lr_0 = 3.5870e-04
Loss = 6.5253e-03, PNorm = 163.1039, GNorm = 0.6715, lr_0 = 3.5845e-04
Loss = 6.5915e-03, PNorm = 163.1190, GNorm = 0.3511, lr_0 = 3.5821e-04
Loss = 9.8694e-03, PNorm = 163.1366, GNorm = 0.3200, lr_0 = 3.5796e-04
Loss = 8.3367e-03, PNorm = 163.1540, GNorm = 0.4977, lr_0 = 3.5772e-04
Loss = 7.5507e-03, PNorm = 163.1700, GNorm = 0.2252, lr_0 = 3.5747e-04
Loss = 6.7357e-03, PNorm = 163.1842, GNorm = 0.1600, lr_0 = 3.5723e-04
Loss = 6.0714e-03, PNorm = 163.1999, GNorm = 0.1724, lr_0 = 3.5698e-04
Loss = 6.4389e-03, PNorm = 163.2133, GNorm = 0.1043, lr_0 = 3.5674e-04
Loss = 5.7948e-03, PNorm = 163.2273, GNorm = 0.1462, lr_0 = 3.5650e-04
Loss = 1.0280e-02, PNorm = 163.2398, GNorm = 0.4140, lr_0 = 3.5625e-04
Loss = 9.7652e-03, PNorm = 163.2568, GNorm = 0.2889, lr_0 = 3.5601e-04
Loss = 6.4044e-03, PNorm = 163.2719, GNorm = 0.1164, lr_0 = 3.5576e-04
Loss = 7.0677e-03, PNorm = 163.2884, GNorm = 0.2059, lr_0 = 3.5552e-04
Loss = 7.3779e-03, PNorm = 163.3035, GNorm = 0.3196, lr_0 = 3.5528e-04
Loss = 6.9229e-03, PNorm = 163.3166, GNorm = 0.3987, lr_0 = 3.5503e-04
Loss = 7.8869e-03, PNorm = 163.3319, GNorm = 0.1744, lr_0 = 3.5479e-04
Loss = 7.7816e-03, PNorm = 163.3489, GNorm = 0.1792, lr_0 = 3.5455e-04
Loss = 9.4558e-03, PNorm = 163.3649, GNorm = 0.1916, lr_0 = 3.5430e-04
Loss = 8.1586e-03, PNorm = 163.3807, GNorm = 0.1120, lr_0 = 3.5406e-04
Loss = 8.3340e-03, PNorm = 163.3940, GNorm = 0.5181, lr_0 = 3.5382e-04
Loss = 5.7343e-03, PNorm = 163.4046, GNorm = 0.2345, lr_0 = 3.5358e-04
Loss = 8.2297e-03, PNorm = 163.4184, GNorm = 0.3415, lr_0 = 3.5333e-04
Loss = 7.3086e-03, PNorm = 163.4334, GNorm = 0.2427, lr_0 = 3.5309e-04
Loss = 6.5176e-03, PNorm = 163.4514, GNorm = 0.1386, lr_0 = 3.5285e-04
Loss = 8.2501e-03, PNorm = 163.4685, GNorm = 0.1838, lr_0 = 3.5261e-04
Loss = 6.6057e-03, PNorm = 163.4866, GNorm = 0.1880, lr_0 = 3.5237e-04
Loss = 6.4881e-03, PNorm = 163.5048, GNorm = 0.1978, lr_0 = 3.5212e-04
Loss = 5.9835e-03, PNorm = 163.5222, GNorm = 0.1438, lr_0 = 3.5188e-04
Loss = 7.6162e-03, PNorm = 163.5413, GNorm = 0.2414, lr_0 = 3.5164e-04
Loss = 6.0437e-03, PNorm = 163.5568, GNorm = 0.3211, lr_0 = 3.5140e-04
Loss = 7.4794e-03, PNorm = 163.5671, GNorm = 0.3394, lr_0 = 3.5116e-04
Loss = 6.4180e-03, PNorm = 163.5760, GNorm = 0.1326, lr_0 = 3.5092e-04
Loss = 7.5740e-03, PNorm = 163.5884, GNorm = 0.6670, lr_0 = 3.5068e-04
Loss = 9.0780e-03, PNorm = 163.6077, GNorm = 0.3463, lr_0 = 3.5044e-04
Loss = 6.2391e-03, PNorm = 163.6248, GNorm = 0.2559, lr_0 = 3.5020e-04
Loss = 7.1485e-03, PNorm = 163.6427, GNorm = 0.7446, lr_0 = 3.4996e-04
Loss = 6.2183e-03, PNorm = 163.6594, GNorm = 0.2639, lr_0 = 3.4972e-04
Loss = 5.9181e-03, PNorm = 163.6716, GNorm = 0.1569, lr_0 = 3.4948e-04
Loss = 9.3575e-03, PNorm = 163.6862, GNorm = 0.1590, lr_0 = 3.4924e-04
Loss = 7.4202e-03, PNorm = 163.7086, GNorm = 0.1509, lr_0 = 3.4900e-04
Loss = 6.1462e-03, PNorm = 163.7248, GNorm = 0.2240, lr_0 = 3.4876e-04
Loss = 5.8168e-03, PNorm = 163.7383, GNorm = 0.3430, lr_0 = 3.4852e-04
Loss = 6.9124e-03, PNorm = 163.7532, GNorm = 0.2236, lr_0 = 3.4828e-04
Loss = 6.7395e-03, PNorm = 163.7683, GNorm = 0.3128, lr_0 = 3.4805e-04
Loss = 5.5578e-03, PNorm = 163.7848, GNorm = 0.1577, lr_0 = 3.4781e-04
Loss = 6.3871e-03, PNorm = 163.7987, GNorm = 0.2682, lr_0 = 3.4757e-04
Loss = 6.5684e-03, PNorm = 163.8120, GNorm = 0.0812, lr_0 = 3.4733e-04
Loss = 1.0041e-02, PNorm = 163.8284, GNorm = 0.4039, lr_0 = 3.4709e-04
Loss = 6.9027e-03, PNorm = 163.8454, GNorm = 0.1376, lr_0 = 3.4686e-04
Loss = 7.6414e-03, PNorm = 163.8624, GNorm = 0.4074, lr_0 = 3.4662e-04
Loss = 9.4988e-03, PNorm = 163.8781, GNorm = 0.1536, lr_0 = 3.4638e-04
Loss = 6.0611e-03, PNorm = 163.8956, GNorm = 0.1259, lr_0 = 3.4614e-04
Loss = 6.1537e-03, PNorm = 163.9119, GNorm = 0.1005, lr_0 = 3.4591e-04
Loss = 1.0235e-02, PNorm = 163.9304, GNorm = 1.1902, lr_0 = 3.4567e-04
Loss = 6.7379e-03, PNorm = 163.9435, GNorm = 0.2090, lr_0 = 3.4543e-04
Loss = 6.6267e-03, PNorm = 163.9623, GNorm = 0.1745, lr_0 = 3.4520e-04
Loss = 8.4553e-03, PNorm = 163.9763, GNorm = 0.1752, lr_0 = 3.4496e-04
Loss = 7.1244e-03, PNorm = 163.9929, GNorm = 0.2280, lr_0 = 3.4472e-04
Loss = 7.2066e-03, PNorm = 164.0075, GNorm = 0.2177, lr_0 = 3.4449e-04
Loss = 7.0694e-03, PNorm = 164.0234, GNorm = 0.1050, lr_0 = 3.4425e-04
Loss = 7.5573e-03, PNorm = 164.0406, GNorm = 0.2588, lr_0 = 3.4402e-04
Loss = 6.5491e-03, PNorm = 164.0577, GNorm = 0.2756, lr_0 = 3.4378e-04
Loss = 5.8787e-03, PNorm = 164.0748, GNorm = 0.0975, lr_0 = 3.4354e-04
Loss = 6.2605e-03, PNorm = 164.0876, GNorm = 0.2610, lr_0 = 3.4331e-04
Validation mae = 0.279495
Epoch 15
Loss = 6.9535e-03, PNorm = 164.0962, GNorm = 0.1988, lr_0 = 3.4307e-04
Loss = 6.5270e-03, PNorm = 164.1037, GNorm = 0.1315, lr_0 = 3.4284e-04
Loss = 5.6223e-03, PNorm = 164.1122, GNorm = 0.1351, lr_0 = 3.4260e-04
Loss = 5.8473e-03, PNorm = 164.1213, GNorm = 0.2150, lr_0 = 3.4237e-04
Loss = 5.0451e-03, PNorm = 164.1344, GNorm = 0.3884, lr_0 = 3.4213e-04
Loss = 7.1634e-03, PNorm = 164.1490, GNorm = 0.5023, lr_0 = 3.4190e-04
Loss = 6.1737e-03, PNorm = 164.1636, GNorm = 0.1802, lr_0 = 3.4167e-04
Loss = 7.9358e-03, PNorm = 164.1706, GNorm = 0.4490, lr_0 = 3.4143e-04
Loss = 5.3702e-03, PNorm = 164.1816, GNorm = 0.3184, lr_0 = 3.4120e-04
Loss = 6.5139e-03, PNorm = 164.1922, GNorm = 0.2065, lr_0 = 3.4096e-04
Loss = 6.6934e-03, PNorm = 164.2050, GNorm = 0.5480, lr_0 = 3.4073e-04
Loss = 6.4038e-03, PNorm = 164.2160, GNorm = 0.1709, lr_0 = 3.4050e-04
Loss = 6.3180e-03, PNorm = 164.2305, GNorm = 0.5027, lr_0 = 3.4026e-04
Loss = 6.4400e-03, PNorm = 164.2447, GNorm = 0.2107, lr_0 = 3.4003e-04
Loss = 7.3630e-03, PNorm = 164.2593, GNorm = 0.4054, lr_0 = 3.3980e-04
Loss = 6.0030e-03, PNorm = 164.2722, GNorm = 0.2473, lr_0 = 3.3956e-04
Loss = 6.2955e-03, PNorm = 164.2849, GNorm = 0.2501, lr_0 = 3.3933e-04
Loss = 5.3660e-03, PNorm = 164.2959, GNorm = 0.2481, lr_0 = 3.3910e-04
Loss = 5.9550e-03, PNorm = 164.3033, GNorm = 0.2461, lr_0 = 3.3887e-04
Loss = 5.9993e-03, PNorm = 164.3106, GNorm = 0.1289, lr_0 = 3.3864e-04
Loss = 7.2602e-03, PNorm = 164.3196, GNorm = 0.2669, lr_0 = 3.3840e-04
Loss = 5.7953e-03, PNorm = 164.3331, GNorm = 0.1301, lr_0 = 3.3817e-04
Loss = 4.6907e-03, PNorm = 164.3443, GNorm = 0.2888, lr_0 = 3.3794e-04
Loss = 5.4030e-03, PNorm = 164.3562, GNorm = 0.3152, lr_0 = 3.3771e-04
Loss = 5.9743e-03, PNorm = 164.3660, GNorm = 0.2872, lr_0 = 3.3748e-04
Loss = 5.4256e-03, PNorm = 164.3779, GNorm = 0.1458, lr_0 = 3.3725e-04
Loss = 5.8604e-03, PNorm = 164.3916, GNorm = 0.2005, lr_0 = 3.3701e-04
Loss = 6.0797e-03, PNorm = 164.4056, GNorm = 0.3000, lr_0 = 3.3678e-04
Loss = 6.0683e-03, PNorm = 164.4221, GNorm = 0.1065, lr_0 = 3.3655e-04
Loss = 5.9183e-03, PNorm = 164.4360, GNorm = 0.2893, lr_0 = 3.3632e-04
Loss = 6.1331e-03, PNorm = 164.4476, GNorm = 0.2579, lr_0 = 3.3609e-04
Loss = 6.3558e-03, PNorm = 164.4594, GNorm = 0.1622, lr_0 = 3.3586e-04
Loss = 7.7845e-03, PNorm = 164.4709, GNorm = 0.1126, lr_0 = 3.3563e-04
Loss = 5.6604e-03, PNorm = 164.4821, GNorm = 0.1804, lr_0 = 3.3540e-04
Loss = 6.1505e-03, PNorm = 164.4948, GNorm = 0.1858, lr_0 = 3.3517e-04
Loss = 5.6253e-03, PNorm = 164.5052, GNorm = 0.1650, lr_0 = 3.3494e-04
Loss = 5.2141e-03, PNorm = 164.5167, GNorm = 0.3123, lr_0 = 3.3471e-04
Loss = 6.2954e-03, PNorm = 164.5252, GNorm = 0.1657, lr_0 = 3.3448e-04
Loss = 6.3247e-03, PNorm = 164.5359, GNorm = 0.1542, lr_0 = 3.3425e-04
Loss = 5.6389e-03, PNorm = 164.5485, GNorm = 0.3028, lr_0 = 3.3403e-04
Loss = 6.4274e-03, PNorm = 164.5624, GNorm = 0.1382, lr_0 = 3.3380e-04
Loss = 6.1285e-03, PNorm = 164.5744, GNorm = 0.3187, lr_0 = 3.3357e-04
Loss = 5.6042e-03, PNorm = 164.5854, GNorm = 0.3809, lr_0 = 3.3334e-04
Loss = 6.3130e-03, PNorm = 164.6003, GNorm = 0.2699, lr_0 = 3.3311e-04
Loss = 8.0557e-03, PNorm = 164.6119, GNorm = 0.3132, lr_0 = 3.3288e-04
Loss = 6.2669e-03, PNorm = 164.6220, GNorm = 0.2385, lr_0 = 3.3265e-04
Loss = 6.1044e-03, PNorm = 164.6354, GNorm = 0.5953, lr_0 = 3.3243e-04
Loss = 5.5302e-03, PNorm = 164.6486, GNorm = 0.1752, lr_0 = 3.3220e-04
Loss = 6.4608e-03, PNorm = 164.6616, GNorm = 0.1728, lr_0 = 3.3197e-04
Loss = 5.1287e-03, PNorm = 164.6743, GNorm = 0.1565, lr_0 = 3.3174e-04
Loss = 6.4611e-03, PNorm = 164.6873, GNorm = 0.1419, lr_0 = 3.3152e-04
Loss = 6.5930e-03, PNorm = 164.7007, GNorm = 0.3981, lr_0 = 3.3129e-04
Loss = 6.1749e-03, PNorm = 164.7118, GNorm = 0.1062, lr_0 = 3.3106e-04
Loss = 5.1502e-03, PNorm = 164.7221, GNorm = 0.1543, lr_0 = 3.3084e-04
Loss = 6.9809e-03, PNorm = 164.7335, GNorm = 0.2964, lr_0 = 3.3061e-04
Loss = 5.4415e-03, PNorm = 164.7461, GNorm = 0.3994, lr_0 = 3.3038e-04
Loss = 4.9596e-03, PNorm = 164.7583, GNorm = 0.1624, lr_0 = 3.3016e-04
Loss = 6.4674e-03, PNorm = 164.7701, GNorm = 0.2640, lr_0 = 3.2993e-04
Loss = 4.9490e-03, PNorm = 164.7799, GNorm = 0.1309, lr_0 = 3.2970e-04
Loss = 5.7151e-03, PNorm = 164.7905, GNorm = 0.5194, lr_0 = 3.2948e-04
Loss = 6.1768e-03, PNorm = 164.8065, GNorm = 0.1537, lr_0 = 3.2925e-04
Loss = 5.5062e-03, PNorm = 164.8184, GNorm = 0.2173, lr_0 = 3.2903e-04
Loss = 4.7147e-03, PNorm = 164.8321, GNorm = 0.1677, lr_0 = 3.2880e-04
Loss = 5.6084e-03, PNorm = 164.8448, GNorm = 0.0943, lr_0 = 3.2858e-04
Loss = 8.3485e-03, PNorm = 164.8595, GNorm = 0.4763, lr_0 = 3.2835e-04
Loss = 5.6542e-03, PNorm = 164.8741, GNorm = 0.2161, lr_0 = 3.2813e-04
Loss = 5.2241e-03, PNorm = 164.8915, GNorm = 0.1478, lr_0 = 3.2790e-04
Loss = 6.3594e-03, PNorm = 164.9053, GNorm = 0.2167, lr_0 = 3.2768e-04
Loss = 6.0225e-03, PNorm = 164.9190, GNorm = 0.1606, lr_0 = 3.2745e-04
Loss = 1.0404e-02, PNorm = 164.9318, GNorm = 0.4172, lr_0 = 3.2723e-04
Loss = 5.7647e-03, PNorm = 164.9427, GNorm = 0.2399, lr_0 = 3.2700e-04
Loss = 5.0777e-03, PNorm = 164.9536, GNorm = 0.1237, lr_0 = 3.2678e-04
Loss = 4.7900e-03, PNorm = 164.9644, GNorm = 0.2045, lr_0 = 3.2656e-04
Loss = 5.4716e-03, PNorm = 164.9775, GNorm = 0.1865, lr_0 = 3.2633e-04
Loss = 5.5804e-03, PNorm = 164.9909, GNorm = 0.2431, lr_0 = 3.2611e-04
Loss = 5.5724e-03, PNorm = 165.0043, GNorm = 0.5916, lr_0 = 3.2589e-04
Loss = 8.3103e-03, PNorm = 165.0179, GNorm = 0.5608, lr_0 = 3.2566e-04
Loss = 8.1619e-03, PNorm = 165.0318, GNorm = 0.3150, lr_0 = 3.2544e-04
Loss = 9.5060e-03, PNorm = 165.0458, GNorm = 0.3866, lr_0 = 3.2522e-04
Loss = 5.7827e-03, PNorm = 165.0576, GNorm = 0.1508, lr_0 = 3.2499e-04
Loss = 6.5402e-03, PNorm = 165.0700, GNorm = 0.2543, lr_0 = 3.2477e-04
Loss = 5.2257e-03, PNorm = 165.0832, GNorm = 0.1195, lr_0 = 3.2455e-04
Loss = 4.9658e-03, PNorm = 165.0954, GNorm = 0.1978, lr_0 = 3.2433e-04
Loss = 7.1932e-03, PNorm = 165.1068, GNorm = 0.2333, lr_0 = 3.2410e-04
Loss = 5.2581e-03, PNorm = 165.1166, GNorm = 0.2737, lr_0 = 3.2388e-04
Loss = 8.2079e-03, PNorm = 165.1248, GNorm = 0.6072, lr_0 = 3.2366e-04
Loss = 5.9359e-03, PNorm = 165.1348, GNorm = 0.1644, lr_0 = 3.2344e-04
Loss = 5.6026e-03, PNorm = 165.1453, GNorm = 0.2880, lr_0 = 3.2322e-04
Loss = 6.0517e-03, PNorm = 165.1572, GNorm = 0.1666, lr_0 = 3.2300e-04
Loss = 5.9966e-03, PNorm = 165.1706, GNorm = 0.2695, lr_0 = 3.2277e-04
Loss = 6.5378e-03, PNorm = 165.1819, GNorm = 0.2603, lr_0 = 3.2255e-04
Loss = 6.0270e-03, PNorm = 165.1942, GNorm = 0.2349, lr_0 = 3.2233e-04
Loss = 6.9203e-03, PNorm = 165.2097, GNorm = 0.1117, lr_0 = 3.2211e-04
Loss = 6.0021e-03, PNorm = 165.2219, GNorm = 0.2318, lr_0 = 3.2189e-04
Loss = 6.7828e-03, PNorm = 165.2352, GNorm = 0.1605, lr_0 = 3.2167e-04
Loss = 4.9899e-03, PNorm = 165.2487, GNorm = 0.2319, lr_0 = 3.2145e-04
Loss = 4.3930e-03, PNorm = 165.2630, GNorm = 0.2743, lr_0 = 3.2123e-04
Loss = 5.8906e-03, PNorm = 165.2756, GNorm = 0.1341, lr_0 = 3.2101e-04
Loss = 7.3344e-03, PNorm = 165.2895, GNorm = 0.0599, lr_0 = 3.2079e-04
Loss = 5.8751e-03, PNorm = 165.3053, GNorm = 0.2186, lr_0 = 3.2057e-04
Loss = 5.9390e-03, PNorm = 165.3186, GNorm = 0.1640, lr_0 = 3.2035e-04
Loss = 8.2255e-03, PNorm = 165.3345, GNorm = 0.1358, lr_0 = 3.2013e-04
Loss = 4.4448e-03, PNorm = 165.3465, GNorm = 0.1841, lr_0 = 3.1991e-04
Loss = 6.5286e-03, PNorm = 165.3602, GNorm = 0.1653, lr_0 = 3.1969e-04
Loss = 6.4901e-03, PNorm = 165.3790, GNorm = 0.2010, lr_0 = 3.1947e-04
Loss = 6.3003e-03, PNorm = 165.3930, GNorm = 0.2157, lr_0 = 3.1925e-04
Loss = 7.6281e-03, PNorm = 165.4068, GNorm = 0.2382, lr_0 = 3.1904e-04
Loss = 6.9290e-03, PNorm = 165.4169, GNorm = 0.3933, lr_0 = 3.1882e-04
Loss = 6.5541e-03, PNorm = 165.4303, GNorm = 0.2092, lr_0 = 3.1860e-04
Loss = 7.2444e-03, PNorm = 165.4450, GNorm = 0.1123, lr_0 = 3.1838e-04
Loss = 5.9606e-03, PNorm = 165.4612, GNorm = 0.1895, lr_0 = 3.1816e-04
Loss = 5.5773e-03, PNorm = 165.4731, GNorm = 0.3855, lr_0 = 3.1794e-04
Loss = 7.6165e-03, PNorm = 165.4842, GNorm = 0.0679, lr_0 = 3.1773e-04
Loss = 5.3290e-03, PNorm = 165.4985, GNorm = 0.1567, lr_0 = 3.1751e-04
Loss = 5.9646e-03, PNorm = 165.5138, GNorm = 0.5774, lr_0 = 3.1729e-04
Loss = 6.4300e-03, PNorm = 165.5297, GNorm = 0.2923, lr_0 = 3.1707e-04
Loss = 6.0615e-03, PNorm = 165.5451, GNorm = 0.5904, lr_0 = 3.1686e-04
Loss = 6.2227e-03, PNorm = 165.5588, GNorm = 0.4022, lr_0 = 3.1664e-04
Loss = 7.3449e-03, PNorm = 165.5740, GNorm = 0.5175, lr_0 = 3.1642e-04
Loss = 8.4526e-03, PNorm = 165.5855, GNorm = 0.4079, lr_0 = 3.1621e-04
Validation mae = 0.279406
Epoch 16
Loss = 5.2898e-03, PNorm = 165.5987, GNorm = 0.2336, lr_0 = 3.1599e-04
Loss = 6.8096e-03, PNorm = 165.6070, GNorm = 0.1951, lr_0 = 3.1577e-04
Loss = 4.6688e-03, PNorm = 165.6212, GNorm = 0.2372, lr_0 = 3.1556e-04
Loss = 4.7410e-03, PNorm = 165.6341, GNorm = 0.2209, lr_0 = 3.1534e-04
Loss = 7.6443e-03, PNorm = 165.6422, GNorm = 0.5157, lr_0 = 3.1512e-04
Loss = 4.6792e-03, PNorm = 165.6465, GNorm = 0.2106, lr_0 = 3.1491e-04
Loss = 4.7843e-03, PNorm = 165.6527, GNorm = 0.4411, lr_0 = 3.1469e-04
Loss = 8.4995e-03, PNorm = 165.6643, GNorm = 0.1896, lr_0 = 3.1448e-04
Loss = 5.5320e-03, PNorm = 165.6754, GNorm = 0.4381, lr_0 = 3.1426e-04
Loss = 5.5509e-03, PNorm = 165.6885, GNorm = 0.2903, lr_0 = 3.1405e-04
Loss = 4.5147e-03, PNorm = 165.6990, GNorm = 0.1854, lr_0 = 3.1383e-04
Loss = 5.8382e-03, PNorm = 165.7101, GNorm = 0.1752, lr_0 = 3.1362e-04
Loss = 4.3282e-03, PNorm = 165.7177, GNorm = 0.3778, lr_0 = 3.1340e-04
Loss = 5.0563e-03, PNorm = 165.7265, GNorm = 0.1820, lr_0 = 3.1319e-04
Loss = 4.6307e-03, PNorm = 165.7364, GNorm = 0.1617, lr_0 = 3.1297e-04
Loss = 4.6707e-03, PNorm = 165.7458, GNorm = 0.2624, lr_0 = 3.1276e-04
Loss = 5.8006e-03, PNorm = 165.7561, GNorm = 0.5227, lr_0 = 3.1254e-04
Loss = 4.4948e-03, PNorm = 165.7675, GNorm = 0.1789, lr_0 = 3.1233e-04
Loss = 4.6011e-03, PNorm = 165.7800, GNorm = 0.1992, lr_0 = 3.1212e-04
Loss = 4.1894e-03, PNorm = 165.7908, GNorm = 0.2502, lr_0 = 3.1190e-04
Loss = 4.6192e-03, PNorm = 165.8013, GNorm = 0.1661, lr_0 = 3.1169e-04
Loss = 5.4530e-03, PNorm = 165.8122, GNorm = 0.1044, lr_0 = 3.1147e-04
Loss = 5.4429e-03, PNorm = 165.8226, GNorm = 0.3243, lr_0 = 3.1126e-04
Loss = 6.2719e-03, PNorm = 165.8328, GNorm = 0.0824, lr_0 = 3.1105e-04
Loss = 6.8988e-03, PNorm = 165.8447, GNorm = 0.2326, lr_0 = 3.1083e-04
Loss = 1.1145e-02, PNorm = 165.8563, GNorm = 0.3271, lr_0 = 3.1062e-04
Loss = 5.2124e-03, PNorm = 165.8657, GNorm = 0.1541, lr_0 = 3.1041e-04
Loss = 5.9709e-03, PNorm = 165.8733, GNorm = 0.2608, lr_0 = 3.1020e-04
Loss = 4.9814e-03, PNorm = 165.8833, GNorm = 0.3326, lr_0 = 3.0998e-04
Loss = 5.1284e-03, PNorm = 165.8959, GNorm = 0.1717, lr_0 = 3.0977e-04
Loss = 5.3954e-03, PNorm = 165.9059, GNorm = 0.3496, lr_0 = 3.0956e-04
Loss = 5.7525e-03, PNorm = 165.9195, GNorm = 0.0638, lr_0 = 3.0935e-04
Loss = 5.8618e-03, PNorm = 165.9303, GNorm = 0.1165, lr_0 = 3.0914e-04
Loss = 4.6476e-03, PNorm = 165.9395, GNorm = 0.1289, lr_0 = 3.0892e-04
Loss = 5.1876e-03, PNorm = 165.9483, GNorm = 0.1458, lr_0 = 3.0871e-04
Loss = 4.6437e-03, PNorm = 165.9550, GNorm = 0.1576, lr_0 = 3.0850e-04
Loss = 5.3124e-03, PNorm = 165.9617, GNorm = 0.1252, lr_0 = 3.0829e-04
Loss = 4.8153e-03, PNorm = 165.9716, GNorm = 0.1572, lr_0 = 3.0808e-04
Loss = 6.0898e-03, PNorm = 165.9855, GNorm = 0.2488, lr_0 = 3.0787e-04
Loss = 5.4383e-03, PNorm = 165.9979, GNorm = 0.2930, lr_0 = 3.0766e-04
Loss = 4.5362e-03, PNorm = 166.0103, GNorm = 0.3281, lr_0 = 3.0745e-04
Loss = 5.4551e-03, PNorm = 166.0215, GNorm = 0.5602, lr_0 = 3.0723e-04
Loss = 4.2510e-03, PNorm = 166.0302, GNorm = 0.1051, lr_0 = 3.0702e-04
Loss = 6.5163e-03, PNorm = 166.0420, GNorm = 0.2460, lr_0 = 3.0681e-04
Loss = 5.3551e-03, PNorm = 166.0539, GNorm = 0.1174, lr_0 = 3.0660e-04
Loss = 6.2767e-03, PNorm = 166.0645, GNorm = 0.2941, lr_0 = 3.0639e-04
Loss = 6.2543e-03, PNorm = 166.0741, GNorm = 0.2484, lr_0 = 3.0618e-04
Loss = 5.0607e-03, PNorm = 166.0833, GNorm = 0.3125, lr_0 = 3.0597e-04
Loss = 6.2882e-03, PNorm = 166.0948, GNorm = 0.2685, lr_0 = 3.0576e-04
Loss = 4.6333e-03, PNorm = 166.1044, GNorm = 0.1299, lr_0 = 3.0555e-04
Loss = 4.7214e-03, PNorm = 166.1157, GNorm = 0.1561, lr_0 = 3.0535e-04
Loss = 4.4091e-03, PNorm = 166.1258, GNorm = 0.0582, lr_0 = 3.0514e-04
Loss = 4.4422e-03, PNorm = 166.1332, GNorm = 0.0977, lr_0 = 3.0493e-04
Loss = 5.2575e-03, PNorm = 166.1434, GNorm = 0.1338, lr_0 = 3.0472e-04
Loss = 5.0355e-03, PNorm = 166.1527, GNorm = 0.3916, lr_0 = 3.0451e-04
Loss = 7.2214e-03, PNorm = 166.1606, GNorm = 0.4519, lr_0 = 3.0430e-04
Loss = 6.9199e-03, PNorm = 166.1710, GNorm = 0.2675, lr_0 = 3.0409e-04
Loss = 5.1675e-03, PNorm = 166.1791, GNorm = 0.3735, lr_0 = 3.0388e-04
Loss = 5.7920e-03, PNorm = 166.1923, GNorm = 0.1265, lr_0 = 3.0368e-04
Loss = 4.4352e-03, PNorm = 166.2072, GNorm = 0.2646, lr_0 = 3.0347e-04
Loss = 5.1316e-03, PNorm = 166.2214, GNorm = 0.2510, lr_0 = 3.0326e-04
Loss = 5.3660e-03, PNorm = 166.2331, GNorm = 0.2646, lr_0 = 3.0305e-04
Loss = 4.3014e-03, PNorm = 166.2397, GNorm = 0.0717, lr_0 = 3.0284e-04
Loss = 4.0847e-03, PNorm = 166.2474, GNorm = 0.2995, lr_0 = 3.0264e-04
Loss = 4.7727e-03, PNorm = 166.2569, GNorm = 0.4314, lr_0 = 3.0243e-04
Loss = 4.8087e-03, PNorm = 166.2665, GNorm = 0.2519, lr_0 = 3.0222e-04
Loss = 4.9262e-03, PNorm = 166.2791, GNorm = 0.1279, lr_0 = 3.0202e-04
Loss = 7.8136e-03, PNorm = 166.2917, GNorm = 0.2337, lr_0 = 3.0181e-04
Loss = 6.3827e-03, PNorm = 166.3048, GNorm = 0.1317, lr_0 = 3.0160e-04
Loss = 4.8145e-03, PNorm = 166.3160, GNorm = 0.2733, lr_0 = 3.0140e-04
Loss = 8.5169e-03, PNorm = 166.3206, GNorm = 0.3960, lr_0 = 3.0119e-04
Loss = 5.3974e-03, PNorm = 166.3295, GNorm = 0.2292, lr_0 = 3.0098e-04
Loss = 6.9116e-03, PNorm = 166.3398, GNorm = 0.3426, lr_0 = 3.0078e-04
Loss = 4.8421e-03, PNorm = 166.3498, GNorm = 0.1764, lr_0 = 3.0057e-04
Loss = 5.2848e-03, PNorm = 166.3609, GNorm = 0.2912, lr_0 = 3.0036e-04
Loss = 5.4878e-03, PNorm = 166.3722, GNorm = 0.3444, lr_0 = 3.0016e-04
Loss = 4.4263e-03, PNorm = 166.3820, GNorm = 0.2095, lr_0 = 2.9995e-04
Loss = 5.1243e-03, PNorm = 166.3900, GNorm = 0.1094, lr_0 = 2.9975e-04
Loss = 4.9634e-03, PNorm = 166.4012, GNorm = 0.2445, lr_0 = 2.9954e-04
Loss = 4.3013e-03, PNorm = 166.4104, GNorm = 0.1021, lr_0 = 2.9934e-04
Loss = 5.4251e-03, PNorm = 166.4209, GNorm = 0.0760, lr_0 = 2.9913e-04
Loss = 6.4592e-03, PNorm = 166.4354, GNorm = 0.1574, lr_0 = 2.9893e-04
Loss = 4.5436e-03, PNorm = 166.4476, GNorm = 0.2522, lr_0 = 2.9872e-04
Loss = 4.5760e-03, PNorm = 166.4583, GNorm = 0.2285, lr_0 = 2.9852e-04
Loss = 4.6508e-03, PNorm = 166.4660, GNorm = 0.0749, lr_0 = 2.9831e-04
Loss = 4.9942e-03, PNorm = 166.4774, GNorm = 0.1259, lr_0 = 2.9811e-04
Loss = 4.9696e-03, PNorm = 166.4869, GNorm = 0.2458, lr_0 = 2.9790e-04
Loss = 7.1329e-03, PNorm = 166.4981, GNorm = 0.1715, lr_0 = 2.9770e-04
Loss = 4.7525e-03, PNorm = 166.5093, GNorm = 0.1894, lr_0 = 2.9750e-04
Loss = 5.0216e-03, PNorm = 166.5211, GNorm = 0.2929, lr_0 = 2.9729e-04
Loss = 4.9134e-03, PNorm = 166.5293, GNorm = 0.1213, lr_0 = 2.9709e-04
Loss = 5.9470e-03, PNorm = 166.5392, GNorm = 0.2520, lr_0 = 2.9689e-04
Loss = 5.9395e-03, PNorm = 166.5474, GNorm = 0.2640, lr_0 = 2.9668e-04
Loss = 8.3437e-03, PNorm = 166.5564, GNorm = 0.2889, lr_0 = 2.9648e-04
Loss = 4.5749e-03, PNorm = 166.5664, GNorm = 0.0838, lr_0 = 2.9628e-04
Loss = 5.8397e-03, PNorm = 166.5780, GNorm = 0.3390, lr_0 = 2.9607e-04
Loss = 4.1766e-03, PNorm = 166.5935, GNorm = 0.1070, lr_0 = 2.9587e-04
Loss = 4.8095e-03, PNorm = 166.6077, GNorm = 0.2316, lr_0 = 2.9567e-04
Loss = 4.9066e-03, PNorm = 166.6216, GNorm = 0.3791, lr_0 = 2.9546e-04
Loss = 4.3485e-03, PNorm = 166.6327, GNorm = 0.1560, lr_0 = 2.9526e-04
Loss = 4.7450e-03, PNorm = 166.6422, GNorm = 0.1083, lr_0 = 2.9506e-04
Loss = 5.9816e-03, PNorm = 166.6531, GNorm = 0.1566, lr_0 = 2.9486e-04
Loss = 5.5940e-03, PNorm = 166.6600, GNorm = 0.2308, lr_0 = 2.9466e-04
Loss = 4.7379e-03, PNorm = 166.6679, GNorm = 0.2196, lr_0 = 2.9445e-04
Loss = 4.7182e-03, PNorm = 166.6755, GNorm = 0.1384, lr_0 = 2.9425e-04
Loss = 6.7700e-03, PNorm = 166.6835, GNorm = 0.1694, lr_0 = 2.9405e-04
Loss = 6.1253e-03, PNorm = 166.6938, GNorm = 0.1600, lr_0 = 2.9385e-04
Loss = 4.7099e-03, PNorm = 166.7061, GNorm = 0.2003, lr_0 = 2.9365e-04
Loss = 6.7175e-03, PNorm = 166.7162, GNorm = 0.4000, lr_0 = 2.9345e-04
Loss = 6.1587e-03, PNorm = 166.7259, GNorm = 0.1890, lr_0 = 2.9325e-04
Loss = 3.9855e-03, PNorm = 166.7372, GNorm = 0.2955, lr_0 = 2.9305e-04
Loss = 6.0839e-03, PNorm = 166.7478, GNorm = 0.2928, lr_0 = 2.9284e-04
Loss = 5.3520e-03, PNorm = 166.7596, GNorm = 0.2223, lr_0 = 2.9264e-04
Loss = 7.7358e-03, PNorm = 166.7720, GNorm = 0.1446, lr_0 = 2.9244e-04
Loss = 5.6847e-03, PNorm = 166.7840, GNorm = 0.2754, lr_0 = 2.9224e-04
Loss = 4.7084e-03, PNorm = 166.7944, GNorm = 0.2065, lr_0 = 2.9204e-04
Loss = 4.8171e-03, PNorm = 166.8057, GNorm = 0.0739, lr_0 = 2.9184e-04
Loss = 4.6875e-03, PNorm = 166.8155, GNorm = 0.1116, lr_0 = 2.9164e-04
Loss = 5.8002e-03, PNorm = 166.8260, GNorm = 0.0948, lr_0 = 2.9144e-04
Loss = 4.7913e-03, PNorm = 166.8384, GNorm = 0.2914, lr_0 = 2.9124e-04
Validation mae = 0.279182
Epoch 17
Loss = 4.0208e-03, PNorm = 166.8487, GNorm = 0.2206, lr_0 = 2.9104e-04
Loss = 4.9456e-03, PNorm = 166.8571, GNorm = 0.2169, lr_0 = 2.9084e-04
Loss = 4.9212e-03, PNorm = 166.8640, GNorm = 0.6860, lr_0 = 2.9065e-04
Loss = 4.2361e-03, PNorm = 166.8732, GNorm = 0.2013, lr_0 = 2.9045e-04
Loss = 3.9602e-03, PNorm = 166.8821, GNorm = 0.3118, lr_0 = 2.9025e-04
Loss = 4.3027e-03, PNorm = 166.8897, GNorm = 0.1008, lr_0 = 2.9005e-04
Loss = 4.2492e-03, PNorm = 166.8979, GNorm = 0.1350, lr_0 = 2.8985e-04
Loss = 6.7555e-03, PNorm = 166.9069, GNorm = 0.0858, lr_0 = 2.8965e-04
Loss = 4.5157e-03, PNorm = 166.9147, GNorm = 0.1881, lr_0 = 2.8945e-04
Loss = 5.0032e-03, PNorm = 166.9243, GNorm = 0.1437, lr_0 = 2.8925e-04
Loss = 3.9905e-03, PNorm = 166.9329, GNorm = 0.2223, lr_0 = 2.8906e-04
Loss = 5.3187e-03, PNorm = 166.9379, GNorm = 0.3369, lr_0 = 2.8886e-04
Loss = 5.2226e-03, PNorm = 166.9465, GNorm = 0.1949, lr_0 = 2.8866e-04
Loss = 5.5104e-03, PNorm = 166.9549, GNorm = 0.0774, lr_0 = 2.8846e-04
Loss = 4.3276e-03, PNorm = 166.9645, GNorm = 0.1601, lr_0 = 2.8826e-04
Loss = 4.6625e-03, PNorm = 166.9745, GNorm = 0.4361, lr_0 = 2.8807e-04
Loss = 4.4581e-03, PNorm = 166.9812, GNorm = 0.1014, lr_0 = 2.8787e-04
Loss = 4.7869e-03, PNorm = 166.9862, GNorm = 0.3800, lr_0 = 2.8767e-04
Loss = 3.4163e-03, PNorm = 166.9942, GNorm = 0.1294, lr_0 = 2.8748e-04
Loss = 4.7161e-03, PNorm = 167.0056, GNorm = 0.3082, lr_0 = 2.8728e-04
Loss = 5.9909e-03, PNorm = 167.0163, GNorm = 0.2018, lr_0 = 2.8708e-04
Loss = 3.4723e-03, PNorm = 167.0271, GNorm = 0.1727, lr_0 = 2.8689e-04
Loss = 4.0171e-03, PNorm = 167.0339, GNorm = 0.1213, lr_0 = 2.8669e-04
Loss = 3.8049e-03, PNorm = 167.0415, GNorm = 0.1416, lr_0 = 2.8649e-04
Loss = 4.4754e-03, PNorm = 167.0477, GNorm = 0.1204, lr_0 = 2.8630e-04
Loss = 5.0413e-03, PNorm = 167.0570, GNorm = 0.3354, lr_0 = 2.8610e-04
Loss = 4.1893e-03, PNorm = 167.0647, GNorm = 0.1121, lr_0 = 2.8590e-04
Loss = 4.3445e-03, PNorm = 167.0731, GNorm = 0.1760, lr_0 = 2.8571e-04
Loss = 4.1951e-03, PNorm = 167.0793, GNorm = 0.2023, lr_0 = 2.8551e-04
Loss = 7.6092e-03, PNorm = 167.0880, GNorm = 0.1395, lr_0 = 2.8532e-04
Loss = 5.1835e-03, PNorm = 167.0972, GNorm = 0.1445, lr_0 = 2.8512e-04
Loss = 4.4535e-03, PNorm = 167.1068, GNorm = 0.1374, lr_0 = 2.8493e-04
Loss = 3.9932e-03, PNorm = 167.1161, GNorm = 0.0883, lr_0 = 2.8473e-04
Loss = 4.3279e-03, PNorm = 167.1265, GNorm = 0.0819, lr_0 = 2.8454e-04
Loss = 3.6587e-03, PNorm = 167.1355, GNorm = 0.1660, lr_0 = 2.8434e-04
Loss = 5.3147e-03, PNorm = 167.1417, GNorm = 0.1457, lr_0 = 2.8415e-04
Loss = 3.9882e-03, PNorm = 167.1484, GNorm = 0.1372, lr_0 = 2.8395e-04
Loss = 4.4035e-03, PNorm = 167.1579, GNorm = 0.1192, lr_0 = 2.8376e-04
Loss = 5.3501e-03, PNorm = 167.1660, GNorm = 0.3609, lr_0 = 2.8356e-04
Loss = 4.7165e-03, PNorm = 167.1725, GNorm = 0.1356, lr_0 = 2.8337e-04
Loss = 3.8674e-03, PNorm = 167.1797, GNorm = 0.2671, lr_0 = 2.8317e-04
Loss = 3.8488e-03, PNorm = 167.1863, GNorm = 0.1353, lr_0 = 2.8298e-04
Loss = 4.3943e-03, PNorm = 167.1912, GNorm = 0.0795, lr_0 = 2.8279e-04
Loss = 5.4509e-03, PNorm = 167.2009, GNorm = 0.1943, lr_0 = 2.8259e-04
Loss = 4.4415e-03, PNorm = 167.2106, GNorm = 0.2301, lr_0 = 2.8240e-04
Loss = 4.8273e-03, PNorm = 167.2204, GNorm = 0.1449, lr_0 = 2.8221e-04
Loss = 4.6709e-03, PNorm = 167.2292, GNorm = 0.0927, lr_0 = 2.8201e-04
Loss = 4.8488e-03, PNorm = 167.2374, GNorm = 0.2012, lr_0 = 2.8182e-04
Loss = 4.3170e-03, PNorm = 167.2465, GNorm = 0.1796, lr_0 = 2.8163e-04
Loss = 4.1734e-03, PNorm = 167.2551, GNorm = 0.2038, lr_0 = 2.8143e-04
Loss = 3.6764e-03, PNorm = 167.2630, GNorm = 0.0787, lr_0 = 2.8124e-04
Loss = 4.4198e-03, PNorm = 167.2732, GNorm = 0.2613, lr_0 = 2.8105e-04
Loss = 3.5577e-03, PNorm = 167.2828, GNorm = 0.1494, lr_0 = 2.8085e-04
Loss = 5.0408e-03, PNorm = 167.2912, GNorm = 0.2202, lr_0 = 2.8066e-04
Loss = 3.8617e-03, PNorm = 167.2976, GNorm = 0.1825, lr_0 = 2.8047e-04
Loss = 4.4684e-03, PNorm = 167.3052, GNorm = 0.0926, lr_0 = 2.8028e-04
Loss = 5.8958e-03, PNorm = 167.3135, GNorm = 0.3538, lr_0 = 2.8009e-04
Loss = 4.4964e-03, PNorm = 167.3230, GNorm = 0.1446, lr_0 = 2.7989e-04
Loss = 4.1323e-03, PNorm = 167.3319, GNorm = 0.1531, lr_0 = 2.7970e-04
Loss = 5.5441e-03, PNorm = 167.3398, GNorm = 0.2275, lr_0 = 2.7951e-04
Loss = 4.7410e-03, PNorm = 167.3507, GNorm = 0.0951, lr_0 = 2.7932e-04
Loss = 5.0243e-03, PNorm = 167.3598, GNorm = 0.2687, lr_0 = 2.7913e-04
Loss = 6.0676e-03, PNorm = 167.3711, GNorm = 0.2682, lr_0 = 2.7894e-04
Loss = 3.4921e-03, PNorm = 167.3784, GNorm = 0.1640, lr_0 = 2.7875e-04
Loss = 4.4104e-03, PNorm = 167.3862, GNorm = 0.0995, lr_0 = 2.7855e-04
Loss = 4.9821e-03, PNorm = 167.3936, GNorm = 0.2119, lr_0 = 2.7836e-04
Loss = 4.3080e-03, PNorm = 167.4011, GNorm = 0.2272, lr_0 = 2.7817e-04
Loss = 3.8903e-03, PNorm = 167.4099, GNorm = 0.2383, lr_0 = 2.7798e-04
Loss = 6.1594e-03, PNorm = 167.4156, GNorm = 0.1944, lr_0 = 2.7779e-04
Loss = 4.4328e-03, PNorm = 167.4231, GNorm = 0.1854, lr_0 = 2.7760e-04
Loss = 3.8766e-03, PNorm = 167.4318, GNorm = 0.1255, lr_0 = 2.7741e-04
Loss = 4.1815e-03, PNorm = 167.4421, GNorm = 0.1600, lr_0 = 2.7722e-04
Loss = 3.9313e-03, PNorm = 167.4505, GNorm = 0.1393, lr_0 = 2.7703e-04
Loss = 4.7744e-03, PNorm = 167.4606, GNorm = 0.1730, lr_0 = 2.7684e-04
Loss = 3.3807e-03, PNorm = 167.4718, GNorm = 0.2249, lr_0 = 2.7665e-04
Loss = 4.0901e-03, PNorm = 167.4825, GNorm = 0.2657, lr_0 = 2.7646e-04
Loss = 5.3093e-03, PNorm = 167.4931, GNorm = 0.0616, lr_0 = 2.7627e-04
Loss = 3.6364e-03, PNorm = 167.4993, GNorm = 0.2612, lr_0 = 2.7608e-04
Loss = 4.4433e-03, PNorm = 167.5091, GNorm = 0.3335, lr_0 = 2.7590e-04
Loss = 5.3748e-03, PNorm = 167.5174, GNorm = 0.2706, lr_0 = 2.7571e-04
Loss = 5.4661e-03, PNorm = 167.5268, GNorm = 0.1507, lr_0 = 2.7552e-04
Loss = 5.2007e-03, PNorm = 167.5360, GNorm = 0.1556, lr_0 = 2.7533e-04
Loss = 4.9991e-03, PNorm = 167.5436, GNorm = 0.2729, lr_0 = 2.7514e-04
Loss = 3.3285e-03, PNorm = 167.5535, GNorm = 0.1565, lr_0 = 2.7495e-04
Loss = 3.6531e-03, PNorm = 167.5654, GNorm = 0.0974, lr_0 = 2.7476e-04
Loss = 5.4290e-03, PNorm = 167.5773, GNorm = 0.4017, lr_0 = 2.7457e-04
Loss = 3.8784e-03, PNorm = 167.5860, GNorm = 0.2480, lr_0 = 2.7439e-04
Loss = 4.1385e-03, PNorm = 167.5930, GNorm = 0.0976, lr_0 = 2.7420e-04
Loss = 4.6043e-03, PNorm = 167.6041, GNorm = 0.2343, lr_0 = 2.7401e-04
Loss = 4.5100e-03, PNorm = 167.6156, GNorm = 0.1127, lr_0 = 2.7382e-04
Loss = 4.8907e-03, PNorm = 167.6220, GNorm = 0.1401, lr_0 = 2.7364e-04
Loss = 3.6239e-03, PNorm = 167.6311, GNorm = 0.3837, lr_0 = 2.7345e-04
Loss = 5.6014e-03, PNorm = 167.6393, GNorm = 0.2426, lr_0 = 2.7326e-04
Loss = 4.2942e-03, PNorm = 167.6490, GNorm = 0.2901, lr_0 = 2.7307e-04
Loss = 5.4782e-03, PNorm = 167.6581, GNorm = 0.1944, lr_0 = 2.7289e-04
Loss = 5.6955e-03, PNorm = 167.6664, GNorm = 0.6563, lr_0 = 2.7270e-04
Loss = 4.8184e-03, PNorm = 167.6742, GNorm = 0.5140, lr_0 = 2.7251e-04
Loss = 5.3655e-03, PNorm = 167.6813, GNorm = 0.3953, lr_0 = 2.7233e-04
Loss = 6.1379e-03, PNorm = 167.6884, GNorm = 0.6430, lr_0 = 2.7214e-04
Loss = 4.8923e-03, PNorm = 167.6986, GNorm = 0.3430, lr_0 = 2.7195e-04
Loss = 5.4670e-03, PNorm = 167.7095, GNorm = 0.2184, lr_0 = 2.7177e-04
Loss = 3.1662e-03, PNorm = 167.7217, GNorm = 0.1158, lr_0 = 2.7158e-04
Loss = 3.9144e-03, PNorm = 167.7305, GNorm = 0.1060, lr_0 = 2.7139e-04
Loss = 3.7431e-03, PNorm = 167.7396, GNorm = 0.1774, lr_0 = 2.7121e-04
Loss = 5.0223e-03, PNorm = 167.7491, GNorm = 0.2213, lr_0 = 2.7102e-04
Loss = 5.7776e-03, PNorm = 167.7580, GNorm = 0.4471, lr_0 = 2.7084e-04
Loss = 4.6005e-03, PNorm = 167.7657, GNorm = 0.2115, lr_0 = 2.7065e-04
Loss = 3.7228e-03, PNorm = 167.7730, GNorm = 0.2313, lr_0 = 2.7047e-04
Loss = 4.3113e-03, PNorm = 167.7830, GNorm = 0.2266, lr_0 = 2.7028e-04
Loss = 4.8328e-03, PNorm = 167.7940, GNorm = 0.3037, lr_0 = 2.7010e-04
Loss = 4.8018e-03, PNorm = 167.8021, GNorm = 0.1593, lr_0 = 2.6991e-04
Loss = 3.6322e-03, PNorm = 167.8113, GNorm = 0.1587, lr_0 = 2.6973e-04
Loss = 7.2605e-03, PNorm = 167.8193, GNorm = 0.3219, lr_0 = 2.6954e-04
Loss = 5.0054e-03, PNorm = 167.8274, GNorm = 0.2199, lr_0 = 2.6936e-04
Loss = 5.9838e-03, PNorm = 167.8365, GNorm = 0.3490, lr_0 = 2.6917e-04
Loss = 3.9829e-03, PNorm = 167.8448, GNorm = 0.2723, lr_0 = 2.6899e-04
Loss = 3.8744e-03, PNorm = 167.8512, GNorm = 0.3477, lr_0 = 2.6880e-04
Loss = 3.7424e-03, PNorm = 167.8605, GNorm = 0.2888, lr_0 = 2.6862e-04
Loss = 8.8966e-03, PNorm = 167.8672, GNorm = 0.2239, lr_0 = 2.6844e-04
Loss = 1.0375e-02, PNorm = 167.8767, GNorm = 0.1520, lr_0 = 2.6825e-04
Validation mae = 0.278974
Epoch 18
Loss = 4.5637e-03, PNorm = 167.8851, GNorm = 0.2061, lr_0 = 2.6807e-04
Loss = 5.0316e-03, PNorm = 167.8946, GNorm = 0.3331, lr_0 = 2.6788e-04
Loss = 4.2685e-03, PNorm = 167.9054, GNorm = 0.1271, lr_0 = 2.6770e-04
Loss = 3.9223e-03, PNorm = 167.9153, GNorm = 0.1304, lr_0 = 2.6752e-04
Loss = 4.3264e-03, PNorm = 167.9229, GNorm = 0.1485, lr_0 = 2.6733e-04
Loss = 4.1632e-03, PNorm = 167.9298, GNorm = 0.1549, lr_0 = 2.6715e-04
Loss = 5.7007e-03, PNorm = 167.9375, GNorm = 0.2284, lr_0 = 2.6697e-04
Loss = 5.5574e-03, PNorm = 167.9459, GNorm = 0.1979, lr_0 = 2.6678e-04
Loss = 3.9122e-03, PNorm = 167.9517, GNorm = 0.0797, lr_0 = 2.6660e-04
Loss = 3.3835e-03, PNorm = 167.9569, GNorm = 0.0631, lr_0 = 2.6642e-04
Loss = 4.0406e-03, PNorm = 167.9620, GNorm = 0.2781, lr_0 = 2.6624e-04
Loss = 4.4082e-03, PNorm = 167.9690, GNorm = 0.1008, lr_0 = 2.6605e-04
Loss = 3.9871e-03, PNorm = 167.9778, GNorm = 0.1655, lr_0 = 2.6587e-04
Loss = 4.4474e-03, PNorm = 167.9855, GNorm = 0.4402, lr_0 = 2.6569e-04
Loss = 2.9674e-03, PNorm = 167.9963, GNorm = 0.1732, lr_0 = 2.6551e-04
Loss = 4.9458e-03, PNorm = 168.0031, GNorm = 0.2785, lr_0 = 2.6533e-04
Loss = 3.1935e-03, PNorm = 168.0091, GNorm = 0.2520, lr_0 = 2.6514e-04
Loss = 3.0632e-03, PNorm = 168.0158, GNorm = 0.0814, lr_0 = 2.6496e-04
Loss = 4.1573e-03, PNorm = 168.0222, GNorm = 0.0674, lr_0 = 2.6478e-04
Loss = 3.9721e-03, PNorm = 168.0291, GNorm = 0.1143, lr_0 = 2.6460e-04
Loss = 3.6453e-03, PNorm = 168.0388, GNorm = 0.1002, lr_0 = 2.6442e-04
Loss = 3.7865e-03, PNorm = 168.0467, GNorm = 0.0836, lr_0 = 2.6424e-04
Loss = 3.0688e-03, PNorm = 168.0557, GNorm = 0.2339, lr_0 = 2.6406e-04
Loss = 3.6544e-03, PNorm = 168.0607, GNorm = 0.2920, lr_0 = 2.6388e-04
Loss = 2.9899e-03, PNorm = 168.0652, GNorm = 0.1304, lr_0 = 2.6369e-04
Loss = 3.3718e-03, PNorm = 168.0691, GNorm = 0.1081, lr_0 = 2.6351e-04
Loss = 3.8989e-03, PNorm = 168.0757, GNorm = 0.1319, lr_0 = 2.6333e-04
Loss = 4.2514e-03, PNorm = 168.0818, GNorm = 0.1225, lr_0 = 2.6315e-04
Loss = 3.8932e-03, PNorm = 168.0889, GNorm = 0.3327, lr_0 = 2.6297e-04
Loss = 3.8237e-03, PNorm = 168.0982, GNorm = 0.0532, lr_0 = 2.6279e-04
Loss = 4.9897e-03, PNorm = 168.1066, GNorm = 0.1033, lr_0 = 2.6261e-04
Loss = 3.6111e-03, PNorm = 168.1151, GNorm = 0.0847, lr_0 = 2.6243e-04
Loss = 3.2501e-03, PNorm = 168.1216, GNorm = 0.4012, lr_0 = 2.6225e-04
Loss = 2.9126e-03, PNorm = 168.1281, GNorm = 0.1151, lr_0 = 2.6207e-04
Loss = 5.0326e-03, PNorm = 168.1346, GNorm = 0.1024, lr_0 = 2.6189e-04
Loss = 4.0241e-03, PNorm = 168.1380, GNorm = 0.2461, lr_0 = 2.6171e-04
Loss = 3.4456e-03, PNorm = 168.1436, GNorm = 0.1358, lr_0 = 2.6153e-04
Loss = 3.1396e-03, PNorm = 168.1501, GNorm = 0.2806, lr_0 = 2.6136e-04
Loss = 4.6382e-03, PNorm = 168.1588, GNorm = 0.1911, lr_0 = 2.6118e-04
Loss = 3.6889e-03, PNorm = 168.1657, GNorm = 0.1303, lr_0 = 2.6100e-04
Loss = 3.2515e-03, PNorm = 168.1738, GNorm = 0.0764, lr_0 = 2.6082e-04
Loss = 3.5130e-03, PNorm = 168.1812, GNorm = 0.1200, lr_0 = 2.6064e-04
Loss = 3.2462e-03, PNorm = 168.1871, GNorm = 0.2063, lr_0 = 2.6046e-04
Loss = 3.4093e-03, PNorm = 168.1927, GNorm = 0.3380, lr_0 = 2.6028e-04
Loss = 3.2325e-03, PNorm = 168.1997, GNorm = 0.1508, lr_0 = 2.6011e-04
Loss = 3.4791e-03, PNorm = 168.2072, GNorm = 0.2681, lr_0 = 2.5993e-04
Loss = 4.3590e-03, PNorm = 168.2159, GNorm = 0.1072, lr_0 = 2.5975e-04
Loss = 4.3470e-03, PNorm = 168.2258, GNorm = 0.2245, lr_0 = 2.5957e-04
Loss = 4.3603e-03, PNorm = 168.2332, GNorm = 0.2379, lr_0 = 2.5939e-04
Loss = 5.8106e-03, PNorm = 168.2422, GNorm = 0.2683, lr_0 = 2.5922e-04
Loss = 3.2450e-03, PNorm = 168.2472, GNorm = 0.2067, lr_0 = 2.5904e-04
Loss = 3.7401e-03, PNorm = 168.2541, GNorm = 0.2875, lr_0 = 2.5886e-04
Loss = 4.8522e-03, PNorm = 168.2601, GNorm = 0.1042, lr_0 = 2.5868e-04
Loss = 3.9392e-03, PNorm = 168.2658, GNorm = 0.1613, lr_0 = 2.5851e-04
Loss = 5.6227e-03, PNorm = 168.2724, GNorm = 0.2833, lr_0 = 2.5833e-04
Loss = 3.2735e-03, PNorm = 168.2834, GNorm = 0.1011, lr_0 = 2.5815e-04
Loss = 3.3391e-03, PNorm = 168.2933, GNorm = 0.1218, lr_0 = 2.5797e-04
Loss = 3.7929e-03, PNorm = 168.2999, GNorm = 0.0787, lr_0 = 2.5780e-04
Loss = 6.4639e-03, PNorm = 168.3060, GNorm = 0.3172, lr_0 = 2.5762e-04
Loss = 3.3691e-03, PNorm = 168.3150, GNorm = 0.1613, lr_0 = 2.5745e-04
Loss = 3.4161e-03, PNorm = 168.3225, GNorm = 0.2258, lr_0 = 2.5727e-04
Loss = 5.6406e-03, PNorm = 168.3327, GNorm = 0.2741, lr_0 = 2.5709e-04
Loss = 3.1707e-03, PNorm = 168.3438, GNorm = 0.1415, lr_0 = 2.5692e-04
Loss = 3.8529e-03, PNorm = 168.3519, GNorm = 0.1682, lr_0 = 2.5674e-04
Loss = 6.1160e-03, PNorm = 168.3592, GNorm = 0.3214, lr_0 = 2.5656e-04
Loss = 3.8267e-03, PNorm = 168.3636, GNorm = 0.0685, lr_0 = 2.5639e-04
Loss = 6.6873e-03, PNorm = 168.3647, GNorm = 0.3686, lr_0 = 2.5621e-04
Loss = 6.4510e-03, PNorm = 168.3711, GNorm = 0.3687, lr_0 = 2.5604e-04
Loss = 3.4915e-03, PNorm = 168.3774, GNorm = 0.1840, lr_0 = 2.5586e-04
Loss = 4.4037e-03, PNorm = 168.3848, GNorm = 0.1122, lr_0 = 2.5569e-04
Loss = 3.7414e-03, PNorm = 168.3945, GNorm = 0.2083, lr_0 = 2.5551e-04
Loss = 4.9519e-03, PNorm = 168.4024, GNorm = 0.4328, lr_0 = 2.5534e-04
Loss = 8.0500e-03, PNorm = 168.4127, GNorm = 0.1905, lr_0 = 2.5516e-04
Loss = 3.2808e-03, PNorm = 168.4194, GNorm = 0.1195, lr_0 = 2.5499e-04
Loss = 3.2713e-03, PNorm = 168.4295, GNorm = 0.1670, lr_0 = 2.5481e-04
Loss = 5.4122e-03, PNorm = 168.4379, GNorm = 0.2424, lr_0 = 2.5464e-04
Loss = 3.8388e-03, PNorm = 168.4464, GNorm = 0.2252, lr_0 = 2.5446e-04
Loss = 3.4521e-03, PNorm = 168.4548, GNorm = 0.1782, lr_0 = 2.5429e-04
Loss = 4.9363e-03, PNorm = 168.4641, GNorm = 0.1093, lr_0 = 2.5411e-04
Loss = 5.4613e-03, PNorm = 168.4730, GNorm = 0.1953, lr_0 = 2.5394e-04
Loss = 6.2393e-03, PNorm = 168.4801, GNorm = 0.5910, lr_0 = 2.5377e-04
Loss = 3.0800e-03, PNorm = 168.4870, GNorm = 0.1600, lr_0 = 2.5359e-04
Loss = 3.2767e-03, PNorm = 168.4932, GNorm = 0.3140, lr_0 = 2.5342e-04
Loss = 3.3433e-03, PNorm = 168.4988, GNorm = 0.2194, lr_0 = 2.5325e-04
Loss = 3.9048e-03, PNorm = 168.5062, GNorm = 0.1302, lr_0 = 2.5307e-04
Loss = 6.0643e-03, PNorm = 168.5145, GNorm = 0.2482, lr_0 = 2.5290e-04
Loss = 3.2109e-03, PNorm = 168.5236, GNorm = 0.1436, lr_0 = 2.5273e-04
Loss = 3.4709e-03, PNorm = 168.5338, GNorm = 0.3433, lr_0 = 2.5255e-04
Loss = 3.9923e-03, PNorm = 168.5423, GNorm = 0.1526, lr_0 = 2.5238e-04
Loss = 3.5069e-03, PNorm = 168.5517, GNorm = 0.1187, lr_0 = 2.5221e-04
Loss = 4.2950e-03, PNorm = 168.5573, GNorm = 0.2729, lr_0 = 2.5203e-04
Loss = 3.5446e-03, PNorm = 168.5648, GNorm = 0.3019, lr_0 = 2.5186e-04
Loss = 3.7371e-03, PNorm = 168.5711, GNorm = 0.4503, lr_0 = 2.5169e-04
Loss = 3.6593e-03, PNorm = 168.5786, GNorm = 0.1463, lr_0 = 2.5152e-04
Loss = 4.4104e-03, PNorm = 168.5872, GNorm = 0.2093, lr_0 = 2.5134e-04
Loss = 3.6776e-03, PNorm = 168.5985, GNorm = 0.2214, lr_0 = 2.5117e-04
Loss = 3.4453e-03, PNorm = 168.6077, GNorm = 0.0488, lr_0 = 2.5100e-04
Loss = 3.8563e-03, PNorm = 168.6176, GNorm = 0.5199, lr_0 = 2.5083e-04
Loss = 3.9096e-03, PNorm = 168.6251, GNorm = 0.1425, lr_0 = 2.5066e-04
Loss = 3.3274e-03, PNorm = 168.6325, GNorm = 0.1750, lr_0 = 2.5048e-04
Loss = 2.8474e-03, PNorm = 168.6415, GNorm = 0.3324, lr_0 = 2.5031e-04
Loss = 5.2523e-03, PNorm = 168.6499, GNorm = 0.2469, lr_0 = 2.5014e-04
Loss = 4.2135e-03, PNorm = 168.6582, GNorm = 0.2815, lr_0 = 2.4997e-04
Loss = 3.0563e-03, PNorm = 168.6675, GNorm = 0.1423, lr_0 = 2.4980e-04
Loss = 4.1212e-03, PNorm = 168.6756, GNorm = 0.1303, lr_0 = 2.4963e-04
Loss = 4.3793e-03, PNorm = 168.6838, GNorm = 0.0722, lr_0 = 2.4946e-04
Loss = 2.9400e-03, PNorm = 168.6929, GNorm = 0.4156, lr_0 = 2.4929e-04
Loss = 5.0863e-03, PNorm = 168.7001, GNorm = 0.1255, lr_0 = 2.4911e-04
Loss = 3.3060e-03, PNorm = 168.7083, GNorm = 0.1789, lr_0 = 2.4894e-04
Loss = 3.3527e-03, PNorm = 168.7165, GNorm = 0.2315, lr_0 = 2.4877e-04
Loss = 3.0681e-03, PNorm = 168.7239, GNorm = 0.1983, lr_0 = 2.4860e-04
Loss = 3.6153e-03, PNorm = 168.7316, GNorm = 0.2757, lr_0 = 2.4843e-04
Loss = 3.3633e-03, PNorm = 168.7401, GNorm = 0.1552, lr_0 = 2.4826e-04
Loss = 2.8912e-03, PNorm = 168.7478, GNorm = 0.2861, lr_0 = 2.4809e-04
Loss = 3.2752e-03, PNorm = 168.7549, GNorm = 0.1699, lr_0 = 2.4792e-04
Loss = 3.5771e-03, PNorm = 168.7594, GNorm = 0.2362, lr_0 = 2.4775e-04
Loss = 3.6946e-03, PNorm = 168.7664, GNorm = 0.0939, lr_0 = 2.4758e-04
Loss = 4.7494e-03, PNorm = 168.7737, GNorm = 0.0719, lr_0 = 2.4741e-04
Loss = 5.1617e-03, PNorm = 168.7817, GNorm = 0.2386, lr_0 = 2.4724e-04
Loss = 3.1906e-03, PNorm = 168.7887, GNorm = 0.1642, lr_0 = 2.4707e-04
Validation mae = 0.278917
Epoch 19
Loss = 2.9588e-03, PNorm = 168.7943, GNorm = 0.1358, lr_0 = 2.4690e-04
Loss = 2.8663e-03, PNorm = 168.7984, GNorm = 0.1974, lr_0 = 2.4674e-04
Loss = 3.7785e-03, PNorm = 168.8017, GNorm = 0.2772, lr_0 = 2.4657e-04
Loss = 4.2545e-03, PNorm = 168.8056, GNorm = 0.1585, lr_0 = 2.4640e-04
Loss = 3.7970e-03, PNorm = 168.8103, GNorm = 0.3553, lr_0 = 2.4623e-04
Loss = 2.8967e-03, PNorm = 168.8154, GNorm = 0.1270, lr_0 = 2.4606e-04
Loss = 2.7520e-03, PNorm = 168.8203, GNorm = 0.2132, lr_0 = 2.4589e-04
Loss = 3.6334e-03, PNorm = 168.8257, GNorm = 0.2197, lr_0 = 2.4572e-04
Loss = 3.3001e-03, PNorm = 168.8301, GNorm = 0.0683, lr_0 = 2.4556e-04
Loss = 2.3384e-03, PNorm = 168.8350, GNorm = 0.2331, lr_0 = 2.4539e-04
Loss = 2.4421e-03, PNorm = 168.8400, GNorm = 0.1680, lr_0 = 2.4522e-04
Loss = 3.3090e-03, PNorm = 168.8435, GNorm = 0.0498, lr_0 = 2.4505e-04
Loss = 3.1091e-03, PNorm = 168.8481, GNorm = 0.1077, lr_0 = 2.4488e-04
Loss = 3.1386e-03, PNorm = 168.8546, GNorm = 0.1910, lr_0 = 2.4472e-04
Loss = 3.5341e-03, PNorm = 168.8610, GNorm = 0.2278, lr_0 = 2.4455e-04
Loss = 3.2637e-03, PNorm = 168.8686, GNorm = 0.0907, lr_0 = 2.4438e-04
Loss = 3.3829e-03, PNorm = 168.8743, GNorm = 0.1945, lr_0 = 2.4421e-04
Loss = 2.8799e-03, PNorm = 168.8799, GNorm = 0.1082, lr_0 = 2.4405e-04
Loss = 2.4845e-03, PNorm = 168.8861, GNorm = 0.1529, lr_0 = 2.4388e-04
Loss = 3.2443e-03, PNorm = 168.8910, GNorm = 0.2240, lr_0 = 2.4371e-04
Loss = 2.9694e-03, PNorm = 168.8936, GNorm = 0.1970, lr_0 = 2.4354e-04
Loss = 4.3924e-03, PNorm = 168.9005, GNorm = 0.2840, lr_0 = 2.4338e-04
Loss = 2.8631e-03, PNorm = 168.9069, GNorm = 0.0702, lr_0 = 2.4321e-04
Loss = 3.4337e-03, PNorm = 168.9117, GNorm = 0.2346, lr_0 = 2.4304e-04
Loss = 3.3712e-03, PNorm = 168.9185, GNorm = 0.2302, lr_0 = 2.4288e-04
Loss = 3.8945e-03, PNorm = 168.9257, GNorm = 0.0979, lr_0 = 2.4271e-04
Loss = 3.7373e-03, PNorm = 168.9322, GNorm = 0.1627, lr_0 = 2.4254e-04
Loss = 2.8599e-03, PNorm = 168.9347, GNorm = 0.1277, lr_0 = 2.4238e-04
Loss = 5.6376e-03, PNorm = 168.9396, GNorm = 0.0938, lr_0 = 2.4221e-04
Loss = 2.6556e-03, PNorm = 168.9456, GNorm = 0.1397, lr_0 = 2.4205e-04
Loss = 3.1051e-03, PNorm = 168.9545, GNorm = 0.2910, lr_0 = 2.4188e-04
Loss = 4.1404e-03, PNorm = 168.9620, GNorm = 0.1244, lr_0 = 2.4171e-04
Loss = 2.8941e-03, PNorm = 168.9675, GNorm = 0.0818, lr_0 = 2.4155e-04
Loss = 3.6029e-03, PNorm = 168.9717, GNorm = 0.1459, lr_0 = 2.4138e-04
Loss = 2.2727e-03, PNorm = 168.9791, GNorm = 0.1624, lr_0 = 2.4122e-04
Loss = 2.9356e-03, PNorm = 168.9856, GNorm = 0.1266, lr_0 = 2.4105e-04
Loss = 2.7782e-03, PNorm = 168.9917, GNorm = 0.1142, lr_0 = 2.4089e-04
Loss = 4.1437e-03, PNorm = 168.9980, GNorm = 0.1564, lr_0 = 2.4072e-04
Loss = 3.8908e-03, PNorm = 169.0072, GNorm = 0.2711, lr_0 = 2.4056e-04
Loss = 3.3868e-03, PNorm = 169.0135, GNorm = 0.1164, lr_0 = 2.4039e-04
Loss = 4.6306e-03, PNorm = 169.0189, GNorm = 0.2848, lr_0 = 2.4023e-04
Loss = 3.1140e-03, PNorm = 169.0248, GNorm = 0.1313, lr_0 = 2.4006e-04
Loss = 3.6820e-03, PNorm = 169.0291, GNorm = 0.0897, lr_0 = 2.3990e-04
Loss = 4.4614e-03, PNorm = 169.0340, GNorm = 0.0478, lr_0 = 2.3974e-04
Loss = 3.6808e-03, PNorm = 169.0399, GNorm = 0.0627, lr_0 = 2.3957e-04
Loss = 3.8496e-03, PNorm = 169.0472, GNorm = 0.1275, lr_0 = 2.3941e-04
Loss = 5.4938e-03, PNorm = 169.0555, GNorm = 0.1178, lr_0 = 2.3924e-04
Loss = 2.9868e-03, PNorm = 169.0646, GNorm = 0.1645, lr_0 = 2.3908e-04
Loss = 3.1039e-03, PNorm = 169.0714, GNorm = 0.1144, lr_0 = 2.3892e-04
Loss = 4.3567e-03, PNorm = 169.0778, GNorm = 0.1639, lr_0 = 2.3875e-04
Loss = 3.0885e-03, PNorm = 169.0830, GNorm = 0.0782, lr_0 = 2.3859e-04
Loss = 2.5293e-03, PNorm = 169.0873, GNorm = 0.0655, lr_0 = 2.3842e-04
Loss = 3.3373e-03, PNorm = 169.0925, GNorm = 0.0745, lr_0 = 2.3826e-04
Loss = 3.8209e-03, PNorm = 169.0967, GNorm = 0.1548, lr_0 = 2.3810e-04
Loss = 5.0796e-03, PNorm = 169.1025, GNorm = 0.1648, lr_0 = 2.3794e-04
Loss = 3.0636e-03, PNorm = 169.1067, GNorm = 0.1843, lr_0 = 2.3777e-04
Loss = 2.8979e-03, PNorm = 169.1120, GNorm = 0.1364, lr_0 = 2.3761e-04
Loss = 2.9635e-03, PNorm = 169.1163, GNorm = 0.0805, lr_0 = 2.3745e-04
Loss = 6.3904e-03, PNorm = 169.1183, GNorm = 0.3833, lr_0 = 2.3728e-04
Loss = 3.3704e-03, PNorm = 169.1241, GNorm = 0.0473, lr_0 = 2.3712e-04
Loss = 5.6359e-03, PNorm = 169.1303, GNorm = 0.1069, lr_0 = 2.3696e-04
Loss = 3.7008e-03, PNorm = 169.1360, GNorm = 0.1107, lr_0 = 2.3680e-04
Loss = 2.5815e-03, PNorm = 169.1416, GNorm = 0.1619, lr_0 = 2.3663e-04
Loss = 3.8880e-03, PNorm = 169.1480, GNorm = 0.1234, lr_0 = 2.3647e-04
Loss = 3.3604e-03, PNorm = 169.1546, GNorm = 0.1465, lr_0 = 2.3631e-04
Loss = 2.5817e-03, PNorm = 169.1601, GNorm = 0.0706, lr_0 = 2.3615e-04
Loss = 3.7293e-03, PNorm = 169.1657, GNorm = 0.1645, lr_0 = 2.3599e-04
Loss = 3.6504e-03, PNorm = 169.1690, GNorm = 0.2787, lr_0 = 2.3582e-04
Loss = 2.4458e-03, PNorm = 169.1730, GNorm = 0.1638, lr_0 = 2.3566e-04
Loss = 2.5395e-03, PNorm = 169.1797, GNorm = 0.1155, lr_0 = 2.3550e-04
Loss = 3.0982e-03, PNorm = 169.1876, GNorm = 0.2655, lr_0 = 2.3534e-04
Loss = 4.8540e-03, PNorm = 169.1940, GNorm = 0.1618, lr_0 = 2.3518e-04
Loss = 3.2600e-03, PNorm = 169.2017, GNorm = 0.1200, lr_0 = 2.3502e-04
Loss = 3.8807e-03, PNorm = 169.2096, GNorm = 0.1650, lr_0 = 2.3486e-04
Loss = 4.9092e-03, PNorm = 169.2169, GNorm = 0.1069, lr_0 = 2.3470e-04
Loss = 2.9869e-03, PNorm = 169.2232, GNorm = 0.2132, lr_0 = 2.3454e-04
Loss = 2.7546e-03, PNorm = 169.2279, GNorm = 0.1878, lr_0 = 2.3437e-04
Loss = 2.4517e-03, PNorm = 169.2342, GNorm = 0.1124, lr_0 = 2.3421e-04
Loss = 2.8293e-03, PNorm = 169.2422, GNorm = 0.1307, lr_0 = 2.3405e-04
Loss = 2.7221e-03, PNorm = 169.2478, GNorm = 0.0845, lr_0 = 2.3389e-04
Loss = 3.0909e-03, PNorm = 169.2516, GNorm = 0.4794, lr_0 = 2.3373e-04
Loss = 2.5225e-03, PNorm = 169.2563, GNorm = 0.2685, lr_0 = 2.3357e-04
Loss = 3.4547e-03, PNorm = 169.2620, GNorm = 0.1997, lr_0 = 2.3341e-04
Loss = 2.6771e-03, PNorm = 169.2681, GNorm = 0.1684, lr_0 = 2.3325e-04
Loss = 2.7053e-03, PNorm = 169.2746, GNorm = 0.2325, lr_0 = 2.3309e-04
Loss = 3.1042e-03, PNorm = 169.2791, GNorm = 0.1418, lr_0 = 2.3293e-04
Loss = 5.8165e-03, PNorm = 169.2842, GNorm = 0.2290, lr_0 = 2.3277e-04
Loss = 3.1055e-03, PNorm = 169.2891, GNorm = 0.1877, lr_0 = 2.3261e-04
Loss = 2.7728e-03, PNorm = 169.2937, GNorm = 0.2003, lr_0 = 2.3246e-04
Loss = 3.3140e-03, PNorm = 169.2993, GNorm = 0.2554, lr_0 = 2.3230e-04
Loss = 3.6910e-03, PNorm = 169.3070, GNorm = 0.0538, lr_0 = 2.3214e-04
Loss = 3.7027e-03, PNorm = 169.3153, GNorm = 0.3743, lr_0 = 2.3198e-04
Loss = 3.6631e-03, PNorm = 169.3242, GNorm = 0.0998, lr_0 = 2.3182e-04
Loss = 2.8894e-03, PNorm = 169.3305, GNorm = 0.0609, lr_0 = 2.3166e-04
Loss = 2.5859e-03, PNorm = 169.3346, GNorm = 0.1108, lr_0 = 2.3150e-04
Loss = 6.1082e-03, PNorm = 169.3375, GNorm = 0.2428, lr_0 = 2.3134e-04
Loss = 2.9361e-03, PNorm = 169.3420, GNorm = 0.0896, lr_0 = 2.3118e-04
Loss = 3.3899e-03, PNorm = 169.3499, GNorm = 0.1069, lr_0 = 2.3103e-04
Loss = 2.5951e-03, PNorm = 169.3584, GNorm = 0.1171, lr_0 = 2.3087e-04
Loss = 4.2334e-03, PNorm = 169.3655, GNorm = 0.2043, lr_0 = 2.3071e-04
Loss = 4.0010e-03, PNorm = 169.3720, GNorm = 0.1196, lr_0 = 2.3055e-04
Loss = 3.5198e-03, PNorm = 169.3790, GNorm = 0.1187, lr_0 = 2.3039e-04
Loss = 4.4244e-03, PNorm = 169.3856, GNorm = 0.2974, lr_0 = 2.3024e-04
Loss = 2.5359e-03, PNorm = 169.3926, GNorm = 0.1446, lr_0 = 2.3008e-04
Loss = 5.5518e-03, PNorm = 169.3974, GNorm = 0.3540, lr_0 = 2.2992e-04
Loss = 3.0421e-03, PNorm = 169.4059, GNorm = 0.2423, lr_0 = 2.2976e-04
Loss = 2.9263e-03, PNorm = 169.4132, GNorm = 0.0830, lr_0 = 2.2961e-04
Loss = 2.9536e-03, PNorm = 169.4220, GNorm = 0.1424, lr_0 = 2.2945e-04
Loss = 2.5700e-03, PNorm = 169.4291, GNorm = 0.1025, lr_0 = 2.2929e-04
Loss = 2.7897e-03, PNorm = 169.4342, GNorm = 0.2233, lr_0 = 2.2913e-04
Loss = 3.3631e-03, PNorm = 169.4396, GNorm = 0.0865, lr_0 = 2.2898e-04
Loss = 4.2221e-03, PNorm = 169.4452, GNorm = 0.1761, lr_0 = 2.2882e-04
Loss = 3.4573e-03, PNorm = 169.4486, GNorm = 0.3557, lr_0 = 2.2866e-04
Loss = 4.2882e-03, PNorm = 169.4550, GNorm = 0.0599, lr_0 = 2.2851e-04
Loss = 3.4091e-03, PNorm = 169.4583, GNorm = 0.1637, lr_0 = 2.2835e-04
Loss = 3.8871e-03, PNorm = 169.4628, GNorm = 0.1375, lr_0 = 2.2819e-04
Loss = 2.6283e-03, PNorm = 169.4672, GNorm = 0.2510, lr_0 = 2.2804e-04
Loss = 2.7340e-03, PNorm = 169.4735, GNorm = 0.1641, lr_0 = 2.2788e-04
Loss = 3.6493e-03, PNorm = 169.4794, GNorm = 0.0585, lr_0 = 2.2773e-04
Loss = 4.8137e-03, PNorm = 169.4858, GNorm = 0.1035, lr_0 = 2.2757e-04
Validation mae = 0.278336
Epoch 20
Loss = 2.7839e-03, PNorm = 169.4932, GNorm = 0.1452, lr_0 = 2.2741e-04
Loss = 2.4224e-03, PNorm = 169.4999, GNorm = 0.3040, lr_0 = 2.2726e-04
Loss = 2.8591e-03, PNorm = 169.5058, GNorm = 0.1370, lr_0 = 2.2710e-04
Loss = 4.2172e-03, PNorm = 169.5083, GNorm = 0.4415, lr_0 = 2.2695e-04
Loss = 2.9168e-03, PNorm = 169.5132, GNorm = 0.1242, lr_0 = 2.2679e-04
Loss = 3.9174e-03, PNorm = 169.5170, GNorm = 0.2509, lr_0 = 2.2664e-04
Loss = 2.2973e-03, PNorm = 169.5191, GNorm = 0.2705, lr_0 = 2.2648e-04
Loss = 2.6546e-03, PNorm = 169.5237, GNorm = 0.1258, lr_0 = 2.2632e-04
Loss = 4.7112e-03, PNorm = 169.5296, GNorm = 0.1761, lr_0 = 2.2617e-04
Loss = 2.7812e-03, PNorm = 169.5360, GNorm = 0.1021, lr_0 = 2.2601e-04
Loss = 2.3280e-03, PNorm = 169.5420, GNorm = 0.1665, lr_0 = 2.2586e-04
Loss = 3.2984e-03, PNorm = 169.5481, GNorm = 0.1449, lr_0 = 2.2571e-04
Loss = 2.4273e-03, PNorm = 169.5536, GNorm = 0.0756, lr_0 = 2.2555e-04
Loss = 2.1813e-03, PNorm = 169.5573, GNorm = 0.0834, lr_0 = 2.2540e-04
Loss = 2.2636e-03, PNorm = 169.5606, GNorm = 0.3259, lr_0 = 2.2524e-04
Loss = 2.5424e-03, PNorm = 169.5663, GNorm = 0.0813, lr_0 = 2.2509e-04
Loss = 3.3190e-03, PNorm = 169.5681, GNorm = 0.1877, lr_0 = 2.2493e-04
Loss = 3.3865e-03, PNorm = 169.5712, GNorm = 0.2343, lr_0 = 2.2478e-04
Loss = 2.9844e-03, PNorm = 169.5757, GNorm = 0.1563, lr_0 = 2.2463e-04
Loss = 2.5839e-03, PNorm = 169.5804, GNorm = 0.2224, lr_0 = 2.2447e-04
Loss = 2.2279e-03, PNorm = 169.5873, GNorm = 0.1976, lr_0 = 2.2432e-04
Loss = 2.8869e-03, PNorm = 169.5929, GNorm = 0.1356, lr_0 = 2.2416e-04
Loss = 3.2350e-03, PNorm = 169.5962, GNorm = 0.1013, lr_0 = 2.2401e-04
Loss = 3.6956e-03, PNorm = 169.6022, GNorm = 0.2413, lr_0 = 2.2386e-04
Loss = 2.6336e-03, PNorm = 169.6078, GNorm = 0.2406, lr_0 = 2.2370e-04
Loss = 2.9053e-03, PNorm = 169.6131, GNorm = 0.1603, lr_0 = 2.2355e-04
Loss = 2.6159e-03, PNorm = 169.6193, GNorm = 0.1884, lr_0 = 2.2340e-04
Loss = 3.1399e-03, PNorm = 169.6241, GNorm = 0.2122, lr_0 = 2.2324e-04
Loss = 2.8186e-03, PNorm = 169.6290, GNorm = 0.1843, lr_0 = 2.2309e-04
Loss = 3.0720e-03, PNorm = 169.6329, GNorm = 0.2699, lr_0 = 2.2294e-04
Loss = 2.2388e-03, PNorm = 169.6353, GNorm = 0.1234, lr_0 = 2.2279e-04
Loss = 3.1702e-03, PNorm = 169.6420, GNorm = 0.2398, lr_0 = 2.2263e-04
Loss = 2.3784e-03, PNorm = 169.6472, GNorm = 0.0672, lr_0 = 2.2248e-04
Loss = 3.1457e-03, PNorm = 169.6536, GNorm = 0.0533, lr_0 = 2.2233e-04
Loss = 2.6928e-03, PNorm = 169.6579, GNorm = 0.1920, lr_0 = 2.2218e-04
Loss = 2.6474e-03, PNorm = 169.6629, GNorm = 0.1527, lr_0 = 2.2202e-04
Loss = 3.0811e-03, PNorm = 169.6665, GNorm = 0.1243, lr_0 = 2.2187e-04
Loss = 2.2873e-03, PNorm = 169.6695, GNorm = 0.0658, lr_0 = 2.2172e-04
Loss = 3.2999e-03, PNorm = 169.6763, GNorm = 0.1948, lr_0 = 2.2157e-04
Loss = 2.5422e-03, PNorm = 169.6815, GNorm = 0.2602, lr_0 = 2.2142e-04
Loss = 2.4301e-03, PNorm = 169.6896, GNorm = 0.1670, lr_0 = 2.2126e-04
Loss = 3.2944e-03, PNorm = 169.6965, GNorm = 0.4218, lr_0 = 2.2111e-04
Loss = 3.0933e-03, PNorm = 169.7016, GNorm = 0.5713, lr_0 = 2.2096e-04
Loss = 2.1638e-03, PNorm = 169.7073, GNorm = 0.1381, lr_0 = 2.2081e-04
Loss = 6.7736e-03, PNorm = 169.7119, GNorm = 0.2231, lr_0 = 2.2066e-04
Loss = 3.6101e-03, PNorm = 169.7170, GNorm = 0.2056, lr_0 = 2.2051e-04
Loss = 3.4548e-03, PNorm = 169.7193, GNorm = 0.1764, lr_0 = 2.2036e-04
Loss = 2.5130e-03, PNorm = 169.7219, GNorm = 0.2170, lr_0 = 2.2021e-04
Loss = 2.6529e-03, PNorm = 169.7273, GNorm = 0.3098, lr_0 = 2.2005e-04
Loss = 2.8109e-03, PNorm = 169.7320, GNorm = 0.1112, lr_0 = 2.1990e-04
Loss = 4.1835e-03, PNorm = 169.7377, GNorm = 0.1688, lr_0 = 2.1975e-04
Loss = 4.1443e-03, PNorm = 169.7418, GNorm = 0.0913, lr_0 = 2.1960e-04
Loss = 2.0437e-03, PNorm = 169.7473, GNorm = 0.0951, lr_0 = 2.1945e-04
Loss = 3.5603e-03, PNorm = 169.7515, GNorm = 0.2217, lr_0 = 2.1930e-04
Loss = 2.9455e-03, PNorm = 169.7603, GNorm = 0.1674, lr_0 = 2.1915e-04
Loss = 4.4766e-03, PNorm = 169.7687, GNorm = 0.0809, lr_0 = 2.1900e-04
Loss = 2.3402e-03, PNorm = 169.7755, GNorm = 0.1115, lr_0 = 2.1885e-04
Loss = 2.7526e-03, PNorm = 169.7837, GNorm = 0.0798, lr_0 = 2.1870e-04
Loss = 2.4492e-03, PNorm = 169.7914, GNorm = 0.1005, lr_0 = 2.1855e-04
Loss = 3.3043e-03, PNorm = 169.7964, GNorm = 0.1537, lr_0 = 2.1840e-04
Loss = 3.6640e-03, PNorm = 169.8037, GNorm = 0.0684, lr_0 = 2.1825e-04
Loss = 5.3338e-03, PNorm = 169.8107, GNorm = 0.1768, lr_0 = 2.1810e-04
Loss = 2.7845e-03, PNorm = 169.8160, GNorm = 0.2130, lr_0 = 2.1795e-04
Loss = 2.8997e-03, PNorm = 169.8191, GNorm = 0.0728, lr_0 = 2.1780e-04
Loss = 4.1945e-03, PNorm = 169.8229, GNorm = 0.1128, lr_0 = 2.1765e-04
Loss = 2.3690e-03, PNorm = 169.8271, GNorm = 0.1483, lr_0 = 2.1751e-04
Loss = 2.3649e-03, PNorm = 169.8341, GNorm = 0.1543, lr_0 = 2.1736e-04
Loss = 3.3226e-03, PNorm = 169.8399, GNorm = 0.0835, lr_0 = 2.1721e-04
Loss = 3.0956e-03, PNorm = 169.8444, GNorm = 0.1780, lr_0 = 2.1706e-04
Loss = 2.9122e-03, PNorm = 169.8498, GNorm = 0.2348, lr_0 = 2.1691e-04
Loss = 3.4834e-03, PNorm = 169.8548, GNorm = 0.2092, lr_0 = 2.1676e-04
Loss = 4.5820e-03, PNorm = 169.8614, GNorm = 0.2653, lr_0 = 2.1661e-04
Loss = 2.8779e-03, PNorm = 169.8674, GNorm = 0.1082, lr_0 = 2.1646e-04
Loss = 3.1719e-03, PNorm = 169.8730, GNorm = 0.0511, lr_0 = 2.1632e-04
Loss = 2.8941e-03, PNorm = 169.8783, GNorm = 0.0876, lr_0 = 2.1617e-04
Loss = 2.5299e-03, PNorm = 169.8829, GNorm = 0.1107, lr_0 = 2.1602e-04
Loss = 2.1050e-03, PNorm = 169.8871, GNorm = 0.1347, lr_0 = 2.1587e-04
Loss = 2.6084e-03, PNorm = 169.8917, GNorm = 0.1192, lr_0 = 2.1572e-04
Loss = 2.9615e-03, PNorm = 169.8960, GNorm = 0.1323, lr_0 = 2.1558e-04
Loss = 2.8243e-03, PNorm = 169.9024, GNorm = 0.2102, lr_0 = 2.1543e-04
Loss = 3.6124e-03, PNorm = 169.9087, GNorm = 0.2864, lr_0 = 2.1528e-04
Loss = 2.7480e-03, PNorm = 169.9147, GNorm = 0.3446, lr_0 = 2.1513e-04
Loss = 3.3459e-03, PNorm = 169.9206, GNorm = 0.8732, lr_0 = 2.1499e-04
Loss = 2.8429e-03, PNorm = 169.9248, GNorm = 0.1153, lr_0 = 2.1484e-04
Loss = 2.5664e-03, PNorm = 169.9295, GNorm = 0.0891, lr_0 = 2.1469e-04
Loss = 2.5027e-03, PNorm = 169.9356, GNorm = 0.0864, lr_0 = 2.1454e-04
Loss = 1.9705e-03, PNorm = 169.9423, GNorm = 0.2502, lr_0 = 2.1440e-04
Loss = 2.5310e-03, PNorm = 169.9488, GNorm = 0.1344, lr_0 = 2.1425e-04
Loss = 5.8688e-03, PNorm = 169.9559, GNorm = 0.2696, lr_0 = 2.1410e-04
Loss = 2.2644e-03, PNorm = 169.9626, GNorm = 0.1696, lr_0 = 2.1396e-04
Loss = 4.8067e-03, PNorm = 169.9694, GNorm = 0.0826, lr_0 = 2.1381e-04
Loss = 3.7326e-03, PNorm = 169.9771, GNorm = 0.0887, lr_0 = 2.1366e-04
Loss = 2.6079e-03, PNorm = 169.9819, GNorm = 0.2157, lr_0 = 2.1352e-04
Loss = 2.8466e-03, PNorm = 169.9884, GNorm = 0.4415, lr_0 = 2.1337e-04
Loss = 2.6046e-03, PNorm = 169.9933, GNorm = 0.2364, lr_0 = 2.1323e-04
Loss = 2.1138e-03, PNorm = 170.0004, GNorm = 0.0587, lr_0 = 2.1308e-04
Loss = 3.1964e-03, PNorm = 170.0044, GNorm = 0.0438, lr_0 = 2.1293e-04
Loss = 3.2153e-03, PNorm = 170.0075, GNorm = 0.2154, lr_0 = 2.1279e-04
Loss = 2.3084e-03, PNorm = 170.0108, GNorm = 0.1385, lr_0 = 2.1264e-04
Loss = 4.7989e-03, PNorm = 170.0151, GNorm = 0.1883, lr_0 = 2.1250e-04
Loss = 2.8207e-03, PNorm = 170.0219, GNorm = 0.2515, lr_0 = 2.1235e-04
Loss = 4.1000e-03, PNorm = 170.0265, GNorm = 0.5243, lr_0 = 2.1221e-04
Loss = 2.9735e-03, PNorm = 170.0298, GNorm = 0.3507, lr_0 = 2.1206e-04
Loss = 3.4234e-03, PNorm = 170.0335, GNorm = 0.1001, lr_0 = 2.1191e-04
Loss = 2.8790e-03, PNorm = 170.0399, GNorm = 0.1436, lr_0 = 2.1177e-04
Loss = 4.0299e-03, PNorm = 170.0484, GNorm = 0.2109, lr_0 = 2.1162e-04
Loss = 2.5408e-03, PNorm = 170.0557, GNorm = 0.3475, lr_0 = 2.1148e-04
Loss = 2.6289e-03, PNorm = 170.0618, GNorm = 0.1921, lr_0 = 2.1133e-04
Loss = 5.8983e-03, PNorm = 170.0679, GNorm = 0.7210, lr_0 = 2.1119e-04
Loss = 3.2052e-03, PNorm = 170.0737, GNorm = 0.1689, lr_0 = 2.1104e-04
Loss = 2.0576e-03, PNorm = 170.0790, GNorm = 0.0495, lr_0 = 2.1090e-04
Loss = 2.2493e-03, PNorm = 170.0839, GNorm = 0.1893, lr_0 = 2.1076e-04
Loss = 2.0065e-03, PNorm = 170.0899, GNorm = 0.1284, lr_0 = 2.1061e-04
Loss = 2.8063e-03, PNorm = 170.0946, GNorm = 0.1210, lr_0 = 2.1047e-04
Loss = 2.8245e-03, PNorm = 170.0985, GNorm = 0.1482, lr_0 = 2.1032e-04
Loss = 4.2800e-03, PNorm = 170.1039, GNorm = 0.4136, lr_0 = 2.1018e-04
Loss = 3.4070e-03, PNorm = 170.1108, GNorm = 0.2506, lr_0 = 2.1003e-04
Loss = 3.0291e-03, PNorm = 170.1187, GNorm = 0.1363, lr_0 = 2.0989e-04
Loss = 4.5990e-03, PNorm = 170.1257, GNorm = 0.2865, lr_0 = 2.0975e-04
Loss = 3.8454e-03, PNorm = 170.1315, GNorm = 0.2452, lr_0 = 2.0960e-04
Validation mae = 0.278784
Epoch 21
Loss = 2.0400e-03, PNorm = 170.1380, GNorm = 0.0729, lr_0 = 2.0946e-04
Loss = 2.3783e-03, PNorm = 170.1431, GNorm = 0.0878, lr_0 = 2.0932e-04
Loss = 1.9879e-03, PNorm = 170.1458, GNorm = 0.0996, lr_0 = 2.0917e-04
Loss = 2.0478e-03, PNorm = 170.1474, GNorm = 0.2276, lr_0 = 2.0903e-04
Loss = 3.2490e-03, PNorm = 170.1484, GNorm = 0.0763, lr_0 = 2.0889e-04
Loss = 1.9771e-03, PNorm = 170.1507, GNorm = 0.0629, lr_0 = 2.0874e-04
Loss = 1.8035e-03, PNorm = 170.1544, GNorm = 0.1543, lr_0 = 2.0860e-04
Loss = 2.6474e-03, PNorm = 170.1579, GNorm = 0.1881, lr_0 = 2.0846e-04
Loss = 1.9798e-03, PNorm = 170.1625, GNorm = 0.1339, lr_0 = 2.0831e-04
Loss = 2.5882e-03, PNorm = 170.1683, GNorm = 0.2275, lr_0 = 2.0817e-04
Loss = 2.1505e-03, PNorm = 170.1719, GNorm = 0.2330, lr_0 = 2.0803e-04
Loss = 2.6181e-03, PNorm = 170.1753, GNorm = 0.3501, lr_0 = 2.0789e-04
Loss = 2.3917e-03, PNorm = 170.1797, GNorm = 0.0636, lr_0 = 2.0774e-04
Loss = 2.3927e-03, PNorm = 170.1824, GNorm = 0.0689, lr_0 = 2.0760e-04
Loss = 2.0422e-03, PNorm = 170.1856, GNorm = 0.2066, lr_0 = 2.0746e-04
Loss = 2.5728e-03, PNorm = 170.1874, GNorm = 0.1239, lr_0 = 2.0732e-04
Loss = 3.0830e-03, PNorm = 170.1902, GNorm = 0.1133, lr_0 = 2.0718e-04
Loss = 4.1354e-03, PNorm = 170.1947, GNorm = 0.1396, lr_0 = 2.0703e-04
Loss = 2.0069e-03, PNorm = 170.2012, GNorm = 0.1325, lr_0 = 2.0689e-04
Loss = 2.2458e-03, PNorm = 170.2083, GNorm = 0.0987, lr_0 = 2.0675e-04
Loss = 2.7956e-03, PNorm = 170.2123, GNorm = 0.1211, lr_0 = 2.0661e-04
Loss = 1.9965e-03, PNorm = 170.2156, GNorm = 0.2943, lr_0 = 2.0647e-04
Loss = 3.2270e-03, PNorm = 170.2203, GNorm = 0.0839, lr_0 = 2.0633e-04
Loss = 1.9291e-03, PNorm = 170.2260, GNorm = 0.1009, lr_0 = 2.0618e-04
Loss = 2.6253e-03, PNorm = 170.2312, GNorm = 0.1250, lr_0 = 2.0604e-04
Loss = 1.9063e-03, PNorm = 170.2345, GNorm = 0.0715, lr_0 = 2.0590e-04
Loss = 1.8750e-03, PNorm = 170.2372, GNorm = 0.0643, lr_0 = 2.0576e-04
Loss = 3.4715e-03, PNorm = 170.2396, GNorm = 0.0435, lr_0 = 2.0562e-04
Loss = 2.7999e-03, PNorm = 170.2440, GNorm = 0.0393, lr_0 = 2.0548e-04
Loss = 2.2249e-03, PNorm = 170.2489, GNorm = 0.2338, lr_0 = 2.0534e-04
Loss = 1.9476e-03, PNorm = 170.2531, GNorm = 0.1122, lr_0 = 2.0520e-04
Loss = 2.1006e-03, PNorm = 170.2572, GNorm = 0.0732, lr_0 = 2.0506e-04
Loss = 1.8128e-03, PNorm = 170.2595, GNorm = 0.2049, lr_0 = 2.0492e-04
Loss = 2.1927e-03, PNorm = 170.2629, GNorm = 0.3407, lr_0 = 2.0478e-04
Loss = 2.1527e-03, PNorm = 170.2668, GNorm = 0.2403, lr_0 = 2.0464e-04
Loss = 2.0607e-03, PNorm = 170.2719, GNorm = 0.1807, lr_0 = 2.0450e-04
Loss = 3.8169e-03, PNorm = 170.2774, GNorm = 0.0971, lr_0 = 2.0436e-04
Loss = 2.0305e-03, PNorm = 170.2807, GNorm = 0.2163, lr_0 = 2.0422e-04
Loss = 2.4312e-03, PNorm = 170.2859, GNorm = 0.2026, lr_0 = 2.0408e-04
Loss = 2.6221e-03, PNorm = 170.2883, GNorm = 0.1372, lr_0 = 2.0394e-04
Loss = 2.3218e-03, PNorm = 170.2934, GNorm = 0.2462, lr_0 = 2.0380e-04
Loss = 3.9032e-03, PNorm = 170.2979, GNorm = 0.2778, lr_0 = 2.0366e-04
Loss = 2.5279e-03, PNorm = 170.3018, GNorm = 0.0877, lr_0 = 2.0352e-04
Loss = 3.3315e-03, PNorm = 170.3068, GNorm = 0.0673, lr_0 = 2.0338e-04
Loss = 3.4118e-03, PNorm = 170.3133, GNorm = 0.2617, lr_0 = 2.0324e-04
Loss = 2.1274e-03, PNorm = 170.3193, GNorm = 0.1260, lr_0 = 2.0310e-04
Loss = 2.2637e-03, PNorm = 170.3253, GNorm = 0.0600, lr_0 = 2.0296e-04
Loss = 1.8930e-03, PNorm = 170.3307, GNorm = 0.2673, lr_0 = 2.0282e-04
Loss = 2.0606e-03, PNorm = 170.3345, GNorm = 0.1786, lr_0 = 2.0268e-04
Loss = 2.2162e-03, PNorm = 170.3397, GNorm = 0.1327, lr_0 = 2.0254e-04
Loss = 1.7498e-03, PNorm = 170.3428, GNorm = 0.1160, lr_0 = 2.0240e-04
Loss = 2.3222e-03, PNorm = 170.3456, GNorm = 0.1130, lr_0 = 2.0227e-04
Loss = 2.7447e-03, PNorm = 170.3507, GNorm = 0.1846, lr_0 = 2.0213e-04
Loss = 2.5219e-03, PNorm = 170.3554, GNorm = 0.1370, lr_0 = 2.0199e-04
Loss = 4.5952e-03, PNorm = 170.3583, GNorm = 0.1903, lr_0 = 2.0185e-04
Loss = 2.8208e-03, PNorm = 170.3624, GNorm = 0.1705, lr_0 = 2.0171e-04
Loss = 2.4547e-03, PNorm = 170.3656, GNorm = 0.0804, lr_0 = 2.0157e-04
Loss = 2.4618e-03, PNorm = 170.3704, GNorm = 0.0506, lr_0 = 2.0144e-04
Loss = 3.3521e-03, PNorm = 170.3741, GNorm = 0.0914, lr_0 = 2.0130e-04
Loss = 1.7040e-03, PNorm = 170.3787, GNorm = 0.0524, lr_0 = 2.0116e-04
Loss = 2.6579e-03, PNorm = 170.3823, GNorm = 0.1606, lr_0 = 2.0102e-04
Loss = 2.3726e-03, PNorm = 170.3869, GNorm = 0.0584, lr_0 = 2.0088e-04
Loss = 2.0274e-03, PNorm = 170.3903, GNorm = 0.2767, lr_0 = 2.0075e-04
Loss = 1.8520e-03, PNorm = 170.3965, GNorm = 0.2376, lr_0 = 2.0061e-04
Loss = 2.7318e-03, PNorm = 170.4034, GNorm = 0.1831, lr_0 = 2.0047e-04
Loss = 3.5744e-03, PNorm = 170.4074, GNorm = 0.1024, lr_0 = 2.0033e-04
Loss = 2.7240e-03, PNorm = 170.4122, GNorm = 0.0770, lr_0 = 2.0020e-04
Loss = 2.4234e-03, PNorm = 170.4153, GNorm = 0.1842, lr_0 = 2.0006e-04
Loss = 3.1984e-03, PNorm = 170.4171, GNorm = 0.2245, lr_0 = 1.9992e-04
Loss = 3.4801e-03, PNorm = 170.4215, GNorm = 0.2586, lr_0 = 1.9979e-04
Loss = 2.2750e-03, PNorm = 170.4228, GNorm = 0.2346, lr_0 = 1.9965e-04
Loss = 3.7014e-03, PNorm = 170.4271, GNorm = 0.1193, lr_0 = 1.9951e-04
Loss = 3.7449e-03, PNorm = 170.4326, GNorm = 0.0712, lr_0 = 1.9938e-04
Loss = 4.3062e-03, PNorm = 170.4369, GNorm = 0.1813, lr_0 = 1.9924e-04
Loss = 2.2416e-03, PNorm = 170.4406, GNorm = 0.1392, lr_0 = 1.9910e-04
Loss = 2.1777e-03, PNorm = 170.4419, GNorm = 0.2531, lr_0 = 1.9897e-04
Loss = 2.3736e-03, PNorm = 170.4458, GNorm = 0.0635, lr_0 = 1.9883e-04
Loss = 4.0060e-03, PNorm = 170.4511, GNorm = 0.3548, lr_0 = 1.9869e-04
Loss = 3.2974e-03, PNorm = 170.4586, GNorm = 0.1554, lr_0 = 1.9856e-04
Loss = 2.2084e-03, PNorm = 170.4659, GNorm = 0.2291, lr_0 = 1.9842e-04
Loss = 3.5556e-03, PNorm = 170.4730, GNorm = 0.0749, lr_0 = 1.9829e-04
Loss = 3.3829e-03, PNorm = 170.4786, GNorm = 0.2752, lr_0 = 1.9815e-04
Loss = 2.7784e-03, PNorm = 170.4834, GNorm = 0.1952, lr_0 = 1.9801e-04
Loss = 4.5311e-03, PNorm = 170.4890, GNorm = 0.2250, lr_0 = 1.9788e-04
Loss = 2.6327e-03, PNorm = 170.4970, GNorm = 0.1300, lr_0 = 1.9774e-04
Loss = 2.5172e-03, PNorm = 170.5023, GNorm = 0.1357, lr_0 = 1.9761e-04
Loss = 1.8196e-03, PNorm = 170.5063, GNorm = 0.2283, lr_0 = 1.9747e-04
Loss = 2.4911e-03, PNorm = 170.5081, GNorm = 0.0597, lr_0 = 1.9734e-04
Loss = 2.1038e-03, PNorm = 170.5104, GNorm = 0.2062, lr_0 = 1.9720e-04
Loss = 3.1740e-03, PNorm = 170.5129, GNorm = 0.1113, lr_0 = 1.9707e-04
Loss = 5.3684e-03, PNorm = 170.5162, GNorm = 0.1963, lr_0 = 1.9693e-04
Loss = 2.2092e-03, PNorm = 170.5201, GNorm = 0.0967, lr_0 = 1.9680e-04
Loss = 3.4860e-03, PNorm = 170.5243, GNorm = 0.1179, lr_0 = 1.9666e-04
Loss = 3.0791e-03, PNorm = 170.5289, GNorm = 0.2914, lr_0 = 1.9653e-04
Loss = 2.0717e-03, PNorm = 170.5345, GNorm = 0.1002, lr_0 = 1.9639e-04
Loss = 3.7376e-03, PNorm = 170.5397, GNorm = 0.0618, lr_0 = 1.9626e-04
Loss = 2.3503e-03, PNorm = 170.5433, GNorm = 0.2732, lr_0 = 1.9612e-04
Loss = 2.0150e-03, PNorm = 170.5465, GNorm = 0.0888, lr_0 = 1.9599e-04
Loss = 5.2078e-03, PNorm = 170.5498, GNorm = 0.2195, lr_0 = 1.9585e-04
Loss = 3.1374e-03, PNorm = 170.5534, GNorm = 0.0447, lr_0 = 1.9572e-04
Loss = 2.7203e-03, PNorm = 170.5563, GNorm = 0.1197, lr_0 = 1.9559e-04
Loss = 2.7071e-03, PNorm = 170.5612, GNorm = 0.1870, lr_0 = 1.9545e-04
Loss = 2.8000e-03, PNorm = 170.5685, GNorm = 0.0693, lr_0 = 1.9532e-04
Loss = 1.9721e-03, PNorm = 170.5734, GNorm = 0.0652, lr_0 = 1.9518e-04
Loss = 2.2747e-03, PNorm = 170.5790, GNorm = 0.1238, lr_0 = 1.9505e-04
Loss = 2.1726e-03, PNorm = 170.5838, GNorm = 0.1006, lr_0 = 1.9492e-04
Loss = 2.8311e-03, PNorm = 170.5877, GNorm = 0.4346, lr_0 = 1.9478e-04
Loss = 2.9477e-03, PNorm = 170.5926, GNorm = 0.3964, lr_0 = 1.9465e-04
Loss = 2.4537e-03, PNorm = 170.5964, GNorm = 0.1717, lr_0 = 1.9452e-04
Loss = 6.0826e-03, PNorm = 170.6001, GNorm = 0.1548, lr_0 = 1.9438e-04
Loss = 2.8783e-03, PNorm = 170.6037, GNorm = 0.1596, lr_0 = 1.9425e-04
Loss = 4.5603e-03, PNorm = 170.6086, GNorm = 0.1093, lr_0 = 1.9412e-04
Loss = 4.7992e-03, PNorm = 170.6155, GNorm = 0.0909, lr_0 = 1.9398e-04
Loss = 2.6166e-03, PNorm = 170.6206, GNorm = 0.1842, lr_0 = 1.9385e-04
Loss = 2.8341e-03, PNorm = 170.6252, GNorm = 0.0595, lr_0 = 1.9372e-04
Loss = 2.7586e-03, PNorm = 170.6293, GNorm = 0.1754, lr_0 = 1.9359e-04
Loss = 4.5595e-03, PNorm = 170.6344, GNorm = 0.1142, lr_0 = 1.9345e-04
Loss = 3.0366e-03, PNorm = 170.6383, GNorm = 0.0734, lr_0 = 1.9332e-04
Loss = 2.3816e-03, PNorm = 170.6443, GNorm = 0.1468, lr_0 = 1.9319e-04
Loss = 1.7281e-03, PNorm = 170.6499, GNorm = 0.0936, lr_0 = 1.9306e-04
Validation mae = 0.277908
Epoch 22
Loss = 4.1228e-03, PNorm = 170.6528, GNorm = 0.1785, lr_0 = 1.9292e-04
Loss = 3.1993e-03, PNorm = 170.6545, GNorm = 0.2246, lr_0 = 1.9279e-04
Loss = 2.4039e-03, PNorm = 170.6574, GNorm = 0.0882, lr_0 = 1.9266e-04
Loss = 2.0751e-03, PNorm = 170.6608, GNorm = 0.1089, lr_0 = 1.9253e-04
Loss = 2.0265e-03, PNorm = 170.6628, GNorm = 0.1679, lr_0 = 1.9240e-04
Loss = 2.2233e-03, PNorm = 170.6662, GNorm = 0.0779, lr_0 = 1.9226e-04
Loss = 2.5340e-03, PNorm = 170.6690, GNorm = 0.0727, lr_0 = 1.9213e-04
Loss = 1.6351e-03, PNorm = 170.6714, GNorm = 0.0814, lr_0 = 1.9200e-04
Loss = 3.2337e-03, PNorm = 170.6755, GNorm = 0.0950, lr_0 = 1.9187e-04
Loss = 1.7983e-03, PNorm = 170.6789, GNorm = 0.3257, lr_0 = 1.9174e-04
Loss = 1.8202e-03, PNorm = 170.6822, GNorm = 0.1514, lr_0 = 1.9161e-04
Loss = 2.5560e-03, PNorm = 170.6862, GNorm = 0.1705, lr_0 = 1.9148e-04
Loss = 1.4456e-03, PNorm = 170.6889, GNorm = 0.1445, lr_0 = 1.9134e-04
Loss = 2.0453e-03, PNorm = 170.6910, GNorm = 0.2359, lr_0 = 1.9121e-04
Loss = 1.8008e-03, PNorm = 170.6943, GNorm = 0.1753, lr_0 = 1.9108e-04
Loss = 2.3111e-03, PNorm = 170.6971, GNorm = 0.1833, lr_0 = 1.9095e-04
Loss = 2.2865e-03, PNorm = 170.7012, GNorm = 0.2993, lr_0 = 1.9082e-04
Loss = 2.1448e-03, PNorm = 170.7043, GNorm = 0.1031, lr_0 = 1.9069e-04
Loss = 1.5697e-03, PNorm = 170.7076, GNorm = 0.0844, lr_0 = 1.9056e-04
Loss = 1.6962e-03, PNorm = 170.7091, GNorm = 0.0507, lr_0 = 1.9043e-04
Loss = 2.7255e-03, PNorm = 170.7119, GNorm = 0.1309, lr_0 = 1.9030e-04
Loss = 1.6866e-03, PNorm = 170.7156, GNorm = 0.0382, lr_0 = 1.9017e-04
Loss = 2.5704e-03, PNorm = 170.7183, GNorm = 0.1072, lr_0 = 1.9004e-04
Loss = 2.1195e-03, PNorm = 170.7195, GNorm = 0.1015, lr_0 = 1.8991e-04
Loss = 1.4832e-03, PNorm = 170.7227, GNorm = 0.0926, lr_0 = 1.8978e-04
Loss = 1.8676e-03, PNorm = 170.7253, GNorm = 0.1539, lr_0 = 1.8965e-04
Loss = 1.7123e-03, PNorm = 170.7282, GNorm = 0.2087, lr_0 = 1.8952e-04
Loss = 1.9518e-03, PNorm = 170.7318, GNorm = 0.2161, lr_0 = 1.8939e-04
Loss = 4.5831e-03, PNorm = 170.7361, GNorm = 0.3727, lr_0 = 1.8926e-04
Loss = 3.7765e-03, PNorm = 170.7419, GNorm = 0.2464, lr_0 = 1.8913e-04
Loss = 3.0239e-03, PNorm = 170.7472, GNorm = 0.1502, lr_0 = 1.8900e-04
Loss = 2.4481e-03, PNorm = 170.7525, GNorm = 0.1475, lr_0 = 1.8887e-04
Loss = 4.1912e-03, PNorm = 170.7567, GNorm = 0.1478, lr_0 = 1.8874e-04
Loss = 1.9036e-03, PNorm = 170.7596, GNorm = 0.2140, lr_0 = 1.8861e-04
Loss = 1.9247e-03, PNorm = 170.7635, GNorm = 0.1661, lr_0 = 1.8848e-04
Loss = 2.0564e-03, PNorm = 170.7661, GNorm = 0.3140, lr_0 = 1.8835e-04
Loss = 2.6949e-03, PNorm = 170.7704, GNorm = 0.4164, lr_0 = 1.8822e-04
Loss = 2.9729e-03, PNorm = 170.7747, GNorm = 0.1847, lr_0 = 1.8809e-04
Loss = 2.2303e-03, PNorm = 170.7797, GNorm = 0.1684, lr_0 = 1.8797e-04
Loss = 2.4471e-03, PNorm = 170.7838, GNorm = 0.1519, lr_0 = 1.8784e-04
Loss = 2.4918e-03, PNorm = 170.7874, GNorm = 0.1255, lr_0 = 1.8771e-04
Loss = 2.5932e-03, PNorm = 170.7912, GNorm = 0.0888, lr_0 = 1.8758e-04
Loss = 2.7311e-03, PNorm = 170.7951, GNorm = 0.1267, lr_0 = 1.8745e-04
Loss = 2.5529e-03, PNorm = 170.7981, GNorm = 0.4576, lr_0 = 1.8732e-04
Loss = 2.0970e-03, PNorm = 170.7998, GNorm = 0.0819, lr_0 = 1.8719e-04
Loss = 2.7846e-03, PNorm = 170.8030, GNorm = 0.1617, lr_0 = 1.8707e-04
Loss = 1.4655e-03, PNorm = 170.8077, GNorm = 0.0956, lr_0 = 1.8694e-04
Loss = 2.4910e-03, PNorm = 170.8129, GNorm = 0.1050, lr_0 = 1.8681e-04
Loss = 2.5233e-03, PNorm = 170.8168, GNorm = 0.0562, lr_0 = 1.8668e-04
Loss = 2.1657e-03, PNorm = 170.8208, GNorm = 0.1143, lr_0 = 1.8655e-04
Loss = 2.4219e-03, PNorm = 170.8243, GNorm = 0.0820, lr_0 = 1.8643e-04
Loss = 1.5488e-03, PNorm = 170.8285, GNorm = 0.0855, lr_0 = 1.8630e-04
Loss = 4.9372e-03, PNorm = 170.8338, GNorm = 0.1760, lr_0 = 1.8617e-04
Loss = 2.1462e-03, PNorm = 170.8388, GNorm = 0.0593, lr_0 = 1.8604e-04
Loss = 1.5734e-03, PNorm = 170.8435, GNorm = 0.1252, lr_0 = 1.8592e-04
Loss = 1.4619e-03, PNorm = 170.8483, GNorm = 0.1014, lr_0 = 1.8579e-04
Loss = 2.7650e-03, PNorm = 170.8514, GNorm = 0.2603, lr_0 = 1.8566e-04
Loss = 2.2569e-03, PNorm = 170.8553, GNorm = 0.3000, lr_0 = 1.8553e-04
Loss = 1.4696e-03, PNorm = 170.8607, GNorm = 0.0793, lr_0 = 1.8541e-04
Loss = 1.6463e-03, PNorm = 170.8641, GNorm = 0.1650, lr_0 = 1.8528e-04
Loss = 1.7693e-03, PNorm = 170.8679, GNorm = 0.1146, lr_0 = 1.8515e-04
Loss = 1.5392e-03, PNorm = 170.8704, GNorm = 0.2824, lr_0 = 1.8503e-04
Loss = 2.2047e-03, PNorm = 170.8755, GNorm = 0.2027, lr_0 = 1.8490e-04
Loss = 1.8893e-03, PNorm = 170.8803, GNorm = 0.1765, lr_0 = 1.8477e-04
Loss = 2.0696e-03, PNorm = 170.8840, GNorm = 0.0612, lr_0 = 1.8465e-04
Loss = 2.6701e-03, PNorm = 170.8875, GNorm = 0.0924, lr_0 = 1.8452e-04
Loss = 3.9532e-03, PNorm = 170.8920, GNorm = 0.2442, lr_0 = 1.8439e-04
Loss = 3.3411e-03, PNorm = 170.8940, GNorm = 0.2126, lr_0 = 1.8427e-04
Loss = 1.6621e-03, PNorm = 170.8975, GNorm = 0.0996, lr_0 = 1.8414e-04
Loss = 2.5002e-03, PNorm = 170.9016, GNorm = 0.1134, lr_0 = 1.8401e-04
Loss = 2.7468e-03, PNorm = 170.9060, GNorm = 0.0633, lr_0 = 1.8389e-04
Loss = 1.9165e-03, PNorm = 170.9092, GNorm = 0.1233, lr_0 = 1.8376e-04
Loss = 2.4347e-03, PNorm = 170.9125, GNorm = 0.1970, lr_0 = 1.8364e-04
Loss = 2.6540e-03, PNorm = 170.9171, GNorm = 0.0950, lr_0 = 1.8351e-04
Loss = 2.6122e-03, PNorm = 170.9215, GNorm = 0.1188, lr_0 = 1.8338e-04
Loss = 2.4008e-03, PNorm = 170.9243, GNorm = 0.1497, lr_0 = 1.8326e-04
Loss = 1.9329e-03, PNorm = 170.9275, GNorm = 0.0936, lr_0 = 1.8313e-04
Loss = 2.1586e-03, PNorm = 170.9311, GNorm = 0.1728, lr_0 = 1.8301e-04
Loss = 2.6019e-03, PNorm = 170.9340, GNorm = 0.1499, lr_0 = 1.8288e-04
Loss = 2.0104e-03, PNorm = 170.9385, GNorm = 0.1881, lr_0 = 1.8276e-04
Loss = 1.9408e-03, PNorm = 170.9423, GNorm = 0.1579, lr_0 = 1.8263e-04
Loss = 1.4022e-03, PNorm = 170.9451, GNorm = 0.0627, lr_0 = 1.8251e-04
Loss = 1.9648e-03, PNorm = 170.9468, GNorm = 0.1200, lr_0 = 1.8238e-04
Loss = 1.3859e-03, PNorm = 170.9490, GNorm = 0.1712, lr_0 = 1.8226e-04
Loss = 1.7980e-03, PNorm = 170.9530, GNorm = 0.0609, lr_0 = 1.8213e-04
Loss = 1.8546e-03, PNorm = 170.9583, GNorm = 0.0978, lr_0 = 1.8201e-04
Loss = 2.8095e-03, PNorm = 170.9645, GNorm = 0.2319, lr_0 = 1.8188e-04
Loss = 2.6003e-03, PNorm = 170.9693, GNorm = 0.1201, lr_0 = 1.8176e-04
Loss = 4.3025e-03, PNorm = 170.9724, GNorm = 0.1108, lr_0 = 1.8163e-04
Loss = 1.9162e-03, PNorm = 170.9750, GNorm = 0.1765, lr_0 = 1.8151e-04
Loss = 2.6956e-03, PNorm = 170.9798, GNorm = 0.2208, lr_0 = 1.8138e-04
Loss = 4.7655e-03, PNorm = 170.9852, GNorm = 0.0579, lr_0 = 1.8126e-04
Loss = 1.7170e-03, PNorm = 170.9886, GNorm = 0.0503, lr_0 = 1.8114e-04
Loss = 3.4113e-03, PNorm = 170.9941, GNorm = 0.1090, lr_0 = 1.8101e-04
Loss = 3.2216e-03, PNorm = 170.9974, GNorm = 0.2245, lr_0 = 1.8089e-04
Loss = 2.4225e-03, PNorm = 171.0016, GNorm = 0.1672, lr_0 = 1.8076e-04
Loss = 2.3917e-03, PNorm = 171.0032, GNorm = 0.0590, lr_0 = 1.8064e-04
Loss = 1.7891e-03, PNorm = 171.0056, GNorm = 0.2171, lr_0 = 1.8052e-04
Loss = 1.7052e-03, PNorm = 171.0090, GNorm = 0.0947, lr_0 = 1.8039e-04
Loss = 3.9466e-03, PNorm = 171.0099, GNorm = 0.0530, lr_0 = 1.8027e-04
Loss = 3.1183e-03, PNorm = 171.0124, GNorm = 0.3350, lr_0 = 1.8015e-04
Loss = 1.6594e-03, PNorm = 171.0163, GNorm = 0.2565, lr_0 = 1.8002e-04
Loss = 2.7287e-03, PNorm = 171.0218, GNorm = 0.1881, lr_0 = 1.7990e-04
Loss = 2.9761e-03, PNorm = 171.0268, GNorm = 0.0388, lr_0 = 1.7978e-04
Loss = 2.2671e-03, PNorm = 171.0305, GNorm = 0.0495, lr_0 = 1.7965e-04
Loss = 3.0298e-03, PNorm = 171.0340, GNorm = 0.0821, lr_0 = 1.7953e-04
Loss = 2.3978e-03, PNorm = 171.0367, GNorm = 0.0542, lr_0 = 1.7941e-04
Loss = 1.9860e-03, PNorm = 171.0402, GNorm = 0.1953, lr_0 = 1.7928e-04
Loss = 2.2598e-03, PNorm = 171.0446, GNorm = 0.1173, lr_0 = 1.7916e-04
Loss = 3.1694e-03, PNorm = 171.0481, GNorm = 0.1904, lr_0 = 1.7904e-04
Loss = 1.6363e-03, PNorm = 171.0522, GNorm = 0.0756, lr_0 = 1.7892e-04
Loss = 3.2772e-03, PNorm = 171.0564, GNorm = 0.1116, lr_0 = 1.7879e-04
Loss = 1.8631e-03, PNorm = 171.0616, GNorm = 0.2228, lr_0 = 1.7867e-04
Loss = 2.1028e-03, PNorm = 171.0653, GNorm = 0.0673, lr_0 = 1.7855e-04
Loss = 2.5375e-03, PNorm = 171.0713, GNorm = 0.2094, lr_0 = 1.7843e-04
Loss = 2.0920e-03, PNorm = 171.0759, GNorm = 0.2828, lr_0 = 1.7830e-04
Loss = 2.7438e-03, PNorm = 171.0813, GNorm = 0.0495, lr_0 = 1.7818e-04
Loss = 2.4191e-03, PNorm = 171.0824, GNorm = 0.0643, lr_0 = 1.7806e-04
Loss = 2.2543e-03, PNorm = 171.0861, GNorm = 0.2131, lr_0 = 1.7794e-04
Loss = 5.3869e-03, PNorm = 171.0909, GNorm = 0.0664, lr_0 = 1.7782e-04
Validation mae = 0.278082
Epoch 23
Loss = 1.9896e-03, PNorm = 171.0950, GNorm = 0.1337, lr_0 = 1.7769e-04
Loss = 1.3328e-03, PNorm = 171.0982, GNorm = 0.1086, lr_0 = 1.7757e-04
Loss = 1.9236e-03, PNorm = 171.1004, GNorm = 0.2127, lr_0 = 1.7745e-04
Loss = 1.7108e-03, PNorm = 171.1030, GNorm = 0.1291, lr_0 = 1.7733e-04
Loss = 2.4187e-03, PNorm = 171.1054, GNorm = 0.1111, lr_0 = 1.7721e-04
Loss = 2.6348e-03, PNorm = 171.1085, GNorm = 0.2054, lr_0 = 1.7709e-04
Loss = 1.4469e-03, PNorm = 171.1108, GNorm = 0.0602, lr_0 = 1.7696e-04
Loss = 2.0833e-03, PNorm = 171.1159, GNorm = 0.1091, lr_0 = 1.7684e-04
Loss = 1.6390e-03, PNorm = 171.1183, GNorm = 0.1692, lr_0 = 1.7672e-04
Loss = 2.0991e-03, PNorm = 171.1211, GNorm = 0.1469, lr_0 = 1.7660e-04
Loss = 2.3029e-03, PNorm = 171.1243, GNorm = 0.1195, lr_0 = 1.7648e-04
Loss = 1.6033e-03, PNorm = 171.1276, GNorm = 0.1307, lr_0 = 1.7636e-04
Loss = 1.8142e-03, PNorm = 171.1271, GNorm = 0.0571, lr_0 = 1.7624e-04
Loss = 2.8456e-03, PNorm = 171.1294, GNorm = 0.2290, lr_0 = 1.7612e-04
Loss = 1.4543e-03, PNorm = 171.1321, GNorm = 0.0887, lr_0 = 1.7600e-04
Loss = 1.5226e-03, PNorm = 171.1355, GNorm = 0.1523, lr_0 = 1.7588e-04
Loss = 1.8496e-03, PNorm = 171.1376, GNorm = 0.1593, lr_0 = 1.7576e-04
Loss = 2.2631e-03, PNorm = 171.1394, GNorm = 0.0572, lr_0 = 1.7564e-04
Loss = 1.5779e-03, PNorm = 171.1419, GNorm = 0.0974, lr_0 = 1.7552e-04
Loss = 2.8828e-03, PNorm = 171.1437, GNorm = 0.2280, lr_0 = 1.7540e-04
Loss = 1.8227e-03, PNorm = 171.1464, GNorm = 0.0973, lr_0 = 1.7528e-04
Loss = 2.3094e-03, PNorm = 171.1500, GNorm = 0.2221, lr_0 = 1.7516e-04
Loss = 1.5841e-03, PNorm = 171.1522, GNorm = 0.1979, lr_0 = 1.7504e-04
Loss = 2.0793e-03, PNorm = 171.1543, GNorm = 0.0927, lr_0 = 1.7492e-04
Loss = 2.9112e-03, PNorm = 171.1572, GNorm = 0.0678, lr_0 = 1.7480e-04
Loss = 3.1225e-03, PNorm = 171.1595, GNorm = 0.0442, lr_0 = 1.7468e-04
Loss = 3.7250e-03, PNorm = 171.1644, GNorm = 0.2152, lr_0 = 1.7456e-04
Loss = 1.6820e-03, PNorm = 171.1666, GNorm = 0.1471, lr_0 = 1.7444e-04
Loss = 1.5916e-03, PNorm = 171.1685, GNorm = 0.3058, lr_0 = 1.7432e-04
Loss = 3.8004e-03, PNorm = 171.1708, GNorm = 0.3583, lr_0 = 1.7420e-04
Loss = 2.5718e-03, PNorm = 171.1728, GNorm = 0.1324, lr_0 = 1.7408e-04
Loss = 1.4500e-03, PNorm = 171.1746, GNorm = 0.0611, lr_0 = 1.7396e-04
Loss = 1.8874e-03, PNorm = 171.1759, GNorm = 0.2828, lr_0 = 1.7384e-04
Loss = 1.9814e-03, PNorm = 171.1780, GNorm = 0.2146, lr_0 = 1.7372e-04
Loss = 2.2114e-03, PNorm = 171.1811, GNorm = 0.1243, lr_0 = 1.7360e-04
Loss = 2.3435e-03, PNorm = 171.1840, GNorm = 0.1465, lr_0 = 1.7348e-04
Loss = 1.5412e-03, PNorm = 171.1855, GNorm = 0.1851, lr_0 = 1.7336e-04
Loss = 3.1671e-03, PNorm = 171.1877, GNorm = 0.0330, lr_0 = 1.7325e-04
Loss = 2.3443e-03, PNorm = 171.1899, GNorm = 0.0487, lr_0 = 1.7313e-04
Loss = 2.4671e-03, PNorm = 171.1936, GNorm = 0.1480, lr_0 = 1.7301e-04
Loss = 1.6647e-03, PNorm = 171.1964, GNorm = 0.3427, lr_0 = 1.7289e-04
Loss = 1.9098e-03, PNorm = 171.1982, GNorm = 0.1075, lr_0 = 1.7277e-04
Loss = 2.9801e-03, PNorm = 171.1991, GNorm = 0.1422, lr_0 = 1.7265e-04
Loss = 1.3742e-03, PNorm = 171.2031, GNorm = 0.1352, lr_0 = 1.7253e-04
Loss = 2.3804e-03, PNorm = 171.2062, GNorm = 0.1051, lr_0 = 1.7242e-04
Loss = 1.5696e-03, PNorm = 171.2107, GNorm = 0.0508, lr_0 = 1.7230e-04
Loss = 2.2995e-03, PNorm = 171.2144, GNorm = 0.1264, lr_0 = 1.7218e-04
Loss = 3.2413e-03, PNorm = 171.2188, GNorm = 0.0958, lr_0 = 1.7206e-04
Loss = 2.4301e-03, PNorm = 171.2233, GNorm = 0.1934, lr_0 = 1.7194e-04
Loss = 1.7702e-03, PNorm = 171.2262, GNorm = 0.2311, lr_0 = 1.7183e-04
Loss = 1.2419e-03, PNorm = 171.2298, GNorm = 0.1347, lr_0 = 1.7171e-04
Loss = 1.3996e-03, PNorm = 171.2327, GNorm = 0.2040, lr_0 = 1.7159e-04
Loss = 1.5861e-03, PNorm = 171.2350, GNorm = 0.0690, lr_0 = 1.7147e-04
Loss = 2.6818e-03, PNorm = 171.2370, GNorm = 0.0959, lr_0 = 1.7136e-04
Loss = 1.9904e-03, PNorm = 171.2407, GNorm = 0.1988, lr_0 = 1.7124e-04
Loss = 2.2701e-03, PNorm = 171.2457, GNorm = 0.1579, lr_0 = 1.7112e-04
Loss = 2.5353e-03, PNorm = 171.2502, GNorm = 0.1296, lr_0 = 1.7100e-04
Loss = 1.4586e-03, PNorm = 171.2536, GNorm = 0.1260, lr_0 = 1.7089e-04
Loss = 1.3379e-03, PNorm = 171.2545, GNorm = 0.0370, lr_0 = 1.7077e-04
Loss = 1.4658e-03, PNorm = 171.2571, GNorm = 0.0521, lr_0 = 1.7065e-04
Loss = 1.7807e-03, PNorm = 171.2600, GNorm = 0.1636, lr_0 = 1.7054e-04
Loss = 1.7241e-03, PNorm = 171.2639, GNorm = 0.1424, lr_0 = 1.7042e-04
Loss = 2.3721e-03, PNorm = 171.2681, GNorm = 0.2603, lr_0 = 1.7030e-04
Loss = 2.1411e-03, PNorm = 171.2682, GNorm = 0.1523, lr_0 = 1.7019e-04
Loss = 1.7936e-03, PNorm = 171.2710, GNorm = 0.1947, lr_0 = 1.7007e-04
Loss = 1.4584e-03, PNorm = 171.2753, GNorm = 0.0697, lr_0 = 1.6995e-04
Loss = 1.5128e-03, PNorm = 171.2799, GNorm = 0.0519, lr_0 = 1.6984e-04
Loss = 1.4477e-03, PNorm = 171.2816, GNorm = 0.0890, lr_0 = 1.6972e-04
Loss = 1.3395e-03, PNorm = 171.2833, GNorm = 0.1422, lr_0 = 1.6960e-04
Loss = 2.1330e-03, PNorm = 171.2853, GNorm = 0.1145, lr_0 = 1.6949e-04
Loss = 1.3154e-03, PNorm = 171.2880, GNorm = 0.0896, lr_0 = 1.6937e-04
Loss = 3.3648e-03, PNorm = 171.2920, GNorm = 0.2034, lr_0 = 1.6926e-04
Loss = 1.4428e-03, PNorm = 171.2958, GNorm = 0.0533, lr_0 = 1.6914e-04
Loss = 2.6608e-03, PNorm = 171.2989, GNorm = 0.1596, lr_0 = 1.6902e-04
Loss = 2.1456e-03, PNorm = 171.3009, GNorm = 0.0838, lr_0 = 1.6891e-04
Loss = 1.2890e-03, PNorm = 171.3027, GNorm = 0.0568, lr_0 = 1.6879e-04
Loss = 2.6810e-03, PNorm = 171.3066, GNorm = 0.3354, lr_0 = 1.6868e-04
Loss = 2.1864e-03, PNorm = 171.3112, GNorm = 0.0885, lr_0 = 1.6856e-04
Loss = 1.6088e-03, PNorm = 171.3160, GNorm = 0.2531, lr_0 = 1.6845e-04
Loss = 1.9045e-03, PNorm = 171.3176, GNorm = 0.1114, lr_0 = 1.6833e-04
Loss = 1.7185e-03, PNorm = 171.3194, GNorm = 0.1913, lr_0 = 1.6821e-04
Loss = 1.2939e-03, PNorm = 171.3212, GNorm = 0.1544, lr_0 = 1.6810e-04
Loss = 2.8226e-03, PNorm = 171.3221, GNorm = 0.2683, lr_0 = 1.6798e-04
Loss = 2.9376e-03, PNorm = 171.3257, GNorm = 0.0616, lr_0 = 1.6787e-04
Loss = 2.0259e-03, PNorm = 171.3291, GNorm = 0.1455, lr_0 = 1.6775e-04
Loss = 3.3888e-03, PNorm = 171.3320, GNorm = 0.0620, lr_0 = 1.6764e-04
Loss = 1.7619e-03, PNorm = 171.3329, GNorm = 0.0701, lr_0 = 1.6752e-04
Loss = 2.1321e-03, PNorm = 171.3347, GNorm = 0.0360, lr_0 = 1.6741e-04
Loss = 1.5757e-03, PNorm = 171.3363, GNorm = 0.2778, lr_0 = 1.6729e-04
Loss = 2.1094e-03, PNorm = 171.3416, GNorm = 0.1758, lr_0 = 1.6718e-04
Loss = 2.0336e-03, PNorm = 171.3464, GNorm = 0.1965, lr_0 = 1.6707e-04
Loss = 2.1703e-03, PNorm = 171.3519, GNorm = 0.0385, lr_0 = 1.6695e-04
Loss = 2.0434e-03, PNorm = 171.3548, GNorm = 0.2539, lr_0 = 1.6684e-04
Loss = 3.2532e-03, PNorm = 171.3580, GNorm = 0.1724, lr_0 = 1.6672e-04
Loss = 3.4553e-03, PNorm = 171.3598, GNorm = 0.0734, lr_0 = 1.6661e-04
Loss = 2.1633e-03, PNorm = 171.3627, GNorm = 0.1462, lr_0 = 1.6649e-04
Loss = 1.5984e-03, PNorm = 171.3657, GNorm = 0.2213, lr_0 = 1.6638e-04
Loss = 9.6660e-04, PNorm = 171.3678, GNorm = 0.1538, lr_0 = 1.6627e-04
Loss = 7.8529e-03, PNorm = 171.3712, GNorm = 0.1579, lr_0 = 1.6615e-04
Loss = 1.2660e-03, PNorm = 171.3751, GNorm = 0.1072, lr_0 = 1.6604e-04
Loss = 1.7281e-03, PNorm = 171.3777, GNorm = 0.0556, lr_0 = 1.6592e-04
Loss = 1.4276e-03, PNorm = 171.3801, GNorm = 0.2483, lr_0 = 1.6581e-04
Loss = 1.5015e-03, PNorm = 171.3855, GNorm = 0.2036, lr_0 = 1.6570e-04
Loss = 1.5182e-03, PNorm = 171.3905, GNorm = 0.0553, lr_0 = 1.6558e-04
Loss = 3.7291e-03, PNorm = 171.3931, GNorm = 0.1243, lr_0 = 1.6547e-04
Loss = 3.0640e-03, PNorm = 171.3963, GNorm = 0.1880, lr_0 = 1.6536e-04
Loss = 1.8154e-03, PNorm = 171.3998, GNorm = 0.1230, lr_0 = 1.6524e-04
Loss = 1.5538e-03, PNorm = 171.4047, GNorm = 0.0863, lr_0 = 1.6513e-04
Loss = 1.8635e-03, PNorm = 171.4073, GNorm = 0.0485, lr_0 = 1.6502e-04
Loss = 2.1195e-03, PNorm = 171.4089, GNorm = 0.1685, lr_0 = 1.6490e-04
Loss = 1.5348e-03, PNorm = 171.4121, GNorm = 0.0584, lr_0 = 1.6479e-04
Loss = 1.3257e-03, PNorm = 171.4138, GNorm = 0.0896, lr_0 = 1.6468e-04
Loss = 3.7575e-03, PNorm = 171.4151, GNorm = 0.0949, lr_0 = 1.6457e-04
Loss = 2.5286e-03, PNorm = 171.4166, GNorm = 0.1712, lr_0 = 1.6445e-04
Loss = 4.2186e-03, PNorm = 171.4206, GNorm = 0.1723, lr_0 = 1.6434e-04
Loss = 2.2206e-03, PNorm = 171.4251, GNorm = 0.1097, lr_0 = 1.6423e-04
Loss = 1.9311e-03, PNorm = 171.4280, GNorm = 0.0658, lr_0 = 1.6412e-04
Loss = 1.9401e-03, PNorm = 171.4320, GNorm = 0.0734, lr_0 = 1.6400e-04
Loss = 1.2881e-03, PNorm = 171.4362, GNorm = 0.0801, lr_0 = 1.6389e-04
Loss = 2.0831e-03, PNorm = 171.4397, GNorm = 0.1299, lr_0 = 1.6378e-04
Validation mae = 0.278178
Epoch 24
Loss = 1.4685e-03, PNorm = 171.4417, GNorm = 0.0520, lr_0 = 1.6367e-04
Loss = 1.4978e-03, PNorm = 171.4434, GNorm = 0.0906, lr_0 = 1.6355e-04
Loss = 1.1449e-03, PNorm = 171.4459, GNorm = 0.1441, lr_0 = 1.6344e-04
Loss = 2.9988e-03, PNorm = 171.4489, GNorm = 0.1477, lr_0 = 1.6333e-04
Loss = 1.8708e-03, PNorm = 171.4521, GNorm = 0.0972, lr_0 = 1.6322e-04
Loss = 1.3790e-03, PNorm = 171.4537, GNorm = 0.1043, lr_0 = 1.6311e-04
Loss = 1.9640e-03, PNorm = 171.4549, GNorm = 0.0739, lr_0 = 1.6299e-04
Loss = 1.2686e-03, PNorm = 171.4564, GNorm = 0.1535, lr_0 = 1.6288e-04
Loss = 1.3646e-03, PNorm = 171.4580, GNorm = 0.0983, lr_0 = 1.6277e-04
Loss = 3.4028e-03, PNorm = 171.4603, GNorm = 0.1368, lr_0 = 1.6266e-04
Loss = 1.3294e-03, PNorm = 171.4629, GNorm = 0.1076, lr_0 = 1.6255e-04
Loss = 2.4563e-03, PNorm = 171.4655, GNorm = 0.1178, lr_0 = 1.6244e-04
Loss = 1.1076e-03, PNorm = 171.4676, GNorm = 0.0654, lr_0 = 1.6233e-04
Loss = 2.2915e-03, PNorm = 171.4693, GNorm = 0.2662, lr_0 = 1.6221e-04
Loss = 1.9231e-03, PNorm = 171.4725, GNorm = 0.0307, lr_0 = 1.6210e-04
Loss = 1.3077e-03, PNorm = 171.4770, GNorm = 0.1936, lr_0 = 1.6199e-04
Loss = 2.3664e-03, PNorm = 171.4832, GNorm = 0.0908, lr_0 = 1.6188e-04
Loss = 1.4164e-03, PNorm = 171.4868, GNorm = 0.1232, lr_0 = 1.6177e-04
Loss = 1.7076e-03, PNorm = 171.4872, GNorm = 0.0954, lr_0 = 1.6166e-04
Loss = 1.1221e-03, PNorm = 171.4878, GNorm = 0.0917, lr_0 = 1.6155e-04
Loss = 2.0574e-03, PNorm = 171.4881, GNorm = 0.1161, lr_0 = 1.6144e-04
Loss = 1.9548e-03, PNorm = 171.4904, GNorm = 0.0958, lr_0 = 1.6133e-04
Loss = 1.9276e-03, PNorm = 171.4925, GNorm = 0.0728, lr_0 = 1.6122e-04
Loss = 2.4625e-03, PNorm = 171.4958, GNorm = 0.0440, lr_0 = 1.6111e-04
Loss = 1.9108e-03, PNorm = 171.4986, GNorm = 0.0823, lr_0 = 1.6100e-04
Loss = 2.0459e-03, PNorm = 171.5016, GNorm = 0.1375, lr_0 = 1.6089e-04
Loss = 1.3362e-03, PNorm = 171.5057, GNorm = 0.1032, lr_0 = 1.6078e-04
Loss = 2.5459e-03, PNorm = 171.5094, GNorm = 0.1194, lr_0 = 1.6067e-04
Loss = 3.0906e-03, PNorm = 171.5134, GNorm = 0.0560, lr_0 = 1.6056e-04
Loss = 1.7328e-03, PNorm = 171.5181, GNorm = 0.2840, lr_0 = 1.6045e-04
Loss = 1.6053e-03, PNorm = 171.5222, GNorm = 0.1191, lr_0 = 1.6034e-04
Loss = 1.8022e-03, PNorm = 171.5252, GNorm = 0.1397, lr_0 = 1.6023e-04
Loss = 1.5041e-03, PNorm = 171.5280, GNorm = 0.0652, lr_0 = 1.6012e-04
Loss = 1.4020e-03, PNorm = 171.5305, GNorm = 0.1081, lr_0 = 1.6001e-04
Loss = 2.0977e-03, PNorm = 171.5331, GNorm = 0.0352, lr_0 = 1.5990e-04
Loss = 1.4831e-03, PNorm = 171.5362, GNorm = 0.0653, lr_0 = 1.5979e-04
Loss = 1.0981e-03, PNorm = 171.5389, GNorm = 0.0810, lr_0 = 1.5968e-04
Loss = 1.7817e-03, PNorm = 171.5407, GNorm = 0.1914, lr_0 = 1.5957e-04
Loss = 1.2964e-03, PNorm = 171.5442, GNorm = 0.2299, lr_0 = 1.5946e-04
Loss = 3.1229e-03, PNorm = 171.5462, GNorm = 0.0947, lr_0 = 1.5935e-04
Loss = 1.2105e-03, PNorm = 171.5485, GNorm = 0.1359, lr_0 = 1.5924e-04
Loss = 1.1889e-03, PNorm = 171.5509, GNorm = 0.1445, lr_0 = 1.5913e-04
Loss = 1.4650e-03, PNorm = 171.5519, GNorm = 0.1924, lr_0 = 1.5902e-04
Loss = 1.9238e-03, PNorm = 171.5549, GNorm = 0.0994, lr_0 = 1.5891e-04
Loss = 1.9079e-03, PNorm = 171.5556, GNorm = 0.2013, lr_0 = 1.5880e-04
Loss = 2.7096e-03, PNorm = 171.5563, GNorm = 0.1031, lr_0 = 1.5870e-04
Loss = 1.1806e-03, PNorm = 171.5566, GNorm = 0.0960, lr_0 = 1.5859e-04
Loss = 1.9429e-03, PNorm = 171.5594, GNorm = 0.1128, lr_0 = 1.5848e-04
Loss = 2.2573e-03, PNorm = 171.5629, GNorm = 0.2540, lr_0 = 1.5837e-04
Loss = 1.2771e-03, PNorm = 171.5650, GNorm = 0.1232, lr_0 = 1.5826e-04
Loss = 1.5089e-03, PNorm = 171.5658, GNorm = 0.3605, lr_0 = 1.5815e-04
Loss = 1.8669e-03, PNorm = 171.5693, GNorm = 0.0816, lr_0 = 1.5804e-04
Loss = 2.2981e-03, PNorm = 171.5739, GNorm = 0.0961, lr_0 = 1.5794e-04
Loss = 1.6744e-03, PNorm = 171.5767, GNorm = 0.1245, lr_0 = 1.5783e-04
Loss = 1.2397e-03, PNorm = 171.5794, GNorm = 0.0922, lr_0 = 1.5772e-04
Loss = 4.1887e-03, PNorm = 171.5806, GNorm = 0.0642, lr_0 = 1.5761e-04
Loss = 2.1858e-03, PNorm = 171.5835, GNorm = 0.3160, lr_0 = 1.5750e-04
Loss = 1.2448e-03, PNorm = 171.5846, GNorm = 0.0808, lr_0 = 1.5740e-04
Loss = 1.1576e-03, PNorm = 171.5875, GNorm = 0.1660, lr_0 = 1.5729e-04
Loss = 1.1148e-03, PNorm = 171.5901, GNorm = 0.0672, lr_0 = 1.5718e-04
Loss = 2.8330e-03, PNorm = 171.5923, GNorm = 0.0990, lr_0 = 1.5707e-04
Loss = 1.3853e-03, PNorm = 171.5956, GNorm = 0.1225, lr_0 = 1.5697e-04
Loss = 1.1929e-03, PNorm = 171.5986, GNorm = 0.1734, lr_0 = 1.5686e-04
Loss = 2.0616e-03, PNorm = 171.6024, GNorm = 0.0674, lr_0 = 1.5675e-04
Loss = 9.6377e-04, PNorm = 171.6062, GNorm = 0.0684, lr_0 = 1.5664e-04
Loss = 1.6956e-03, PNorm = 171.6076, GNorm = 0.3525, lr_0 = 1.5654e-04
Loss = 2.9267e-03, PNorm = 171.6098, GNorm = 0.2072, lr_0 = 1.5643e-04
Loss = 1.8547e-03, PNorm = 171.6132, GNorm = 0.0912, lr_0 = 1.5632e-04
Loss = 1.7567e-03, PNorm = 171.6154, GNorm = 0.0418, lr_0 = 1.5621e-04
Loss = 2.4303e-03, PNorm = 171.6174, GNorm = 0.1042, lr_0 = 1.5611e-04
Loss = 2.2517e-03, PNorm = 171.6209, GNorm = 0.1950, lr_0 = 1.5600e-04
Loss = 2.1992e-03, PNorm = 171.6229, GNorm = 0.1453, lr_0 = 1.5589e-04
Loss = 1.2399e-03, PNorm = 171.6247, GNorm = 0.1103, lr_0 = 1.5579e-04
Loss = 2.9658e-03, PNorm = 171.6272, GNorm = 0.1979, lr_0 = 1.5568e-04
Loss = 1.2820e-03, PNorm = 171.6295, GNorm = 0.0515, lr_0 = 1.5557e-04
Loss = 2.2375e-03, PNorm = 171.6316, GNorm = 0.1038, lr_0 = 1.5547e-04
Loss = 1.4923e-03, PNorm = 171.6337, GNorm = 0.0744, lr_0 = 1.5536e-04
Loss = 3.2609e-03, PNorm = 171.6353, GNorm = 0.1666, lr_0 = 1.5525e-04
Loss = 2.5168e-03, PNorm = 171.6386, GNorm = 0.0492, lr_0 = 1.5515e-04
Loss = 2.9315e-03, PNorm = 171.6411, GNorm = 0.1498, lr_0 = 1.5504e-04
Loss = 2.5550e-03, PNorm = 171.6443, GNorm = 0.0598, lr_0 = 1.5493e-04
Loss = 2.5341e-03, PNorm = 171.6475, GNorm = 0.1075, lr_0 = 1.5483e-04
Loss = 9.8810e-04, PNorm = 171.6489, GNorm = 0.1088, lr_0 = 1.5472e-04
Loss = 1.9603e-03, PNorm = 171.6516, GNorm = 0.1526, lr_0 = 1.5462e-04
Loss = 2.1063e-03, PNorm = 171.6540, GNorm = 0.2696, lr_0 = 1.5451e-04
Loss = 2.7002e-03, PNorm = 171.6565, GNorm = 0.1457, lr_0 = 1.5440e-04
Loss = 2.9551e-03, PNorm = 171.6584, GNorm = 0.1137, lr_0 = 1.5430e-04
Loss = 1.7216e-03, PNorm = 171.6634, GNorm = 0.1743, lr_0 = 1.5419e-04
Loss = 1.3159e-03, PNorm = 171.6651, GNorm = 0.0818, lr_0 = 1.5409e-04
Loss = 1.8943e-03, PNorm = 171.6678, GNorm = 0.2134, lr_0 = 1.5398e-04
Loss = 1.8456e-03, PNorm = 171.6682, GNorm = 0.1077, lr_0 = 1.5388e-04
Loss = 2.4956e-03, PNorm = 171.6710, GNorm = 0.0409, lr_0 = 1.5377e-04
Loss = 1.8465e-03, PNorm = 171.6747, GNorm = 0.0762, lr_0 = 1.5367e-04
Loss = 1.7965e-03, PNorm = 171.6772, GNorm = 0.1826, lr_0 = 1.5356e-04
Loss = 2.6355e-03, PNorm = 171.6779, GNorm = 0.0761, lr_0 = 1.5346e-04
Loss = 1.0754e-03, PNorm = 171.6806, GNorm = 0.0705, lr_0 = 1.5335e-04
Loss = 1.3993e-03, PNorm = 171.6837, GNorm = 0.1818, lr_0 = 1.5325e-04
Loss = 1.5043e-03, PNorm = 171.6870, GNorm = 0.0935, lr_0 = 1.5314e-04
Loss = 1.6745e-03, PNorm = 171.6890, GNorm = 0.1050, lr_0 = 1.5304e-04
Loss = 1.1898e-03, PNorm = 171.6914, GNorm = 0.0791, lr_0 = 1.5293e-04
Loss = 1.1411e-03, PNorm = 171.6936, GNorm = 0.0738, lr_0 = 1.5283e-04
Loss = 1.4001e-03, PNorm = 171.6976, GNorm = 0.0978, lr_0 = 1.5272e-04
Loss = 1.2585e-03, PNorm = 171.7026, GNorm = 0.0711, lr_0 = 1.5262e-04
Loss = 2.4432e-03, PNorm = 171.7072, GNorm = 0.1306, lr_0 = 1.5251e-04
Loss = 2.6009e-03, PNorm = 171.7105, GNorm = 0.0730, lr_0 = 1.5241e-04
Loss = 2.8086e-03, PNorm = 171.7126, GNorm = 0.1283, lr_0 = 1.5230e-04
Loss = 4.2950e-03, PNorm = 171.7153, GNorm = 0.2993, lr_0 = 1.5220e-04
Loss = 1.2663e-03, PNorm = 171.7156, GNorm = 0.0505, lr_0 = 1.5209e-04
Loss = 1.2751e-03, PNorm = 171.7193, GNorm = 0.1333, lr_0 = 1.5199e-04
Loss = 2.2392e-03, PNorm = 171.7222, GNorm = 0.0884, lr_0 = 1.5189e-04
Loss = 1.2525e-03, PNorm = 171.7250, GNorm = 0.0697, lr_0 = 1.5178e-04
Loss = 2.0764e-03, PNorm = 171.7271, GNorm = 0.1180, lr_0 = 1.5168e-04
Loss = 1.7333e-03, PNorm = 171.7296, GNorm = 0.1479, lr_0 = 1.5157e-04
Loss = 3.3702e-03, PNorm = 171.7317, GNorm = 0.0489, lr_0 = 1.5147e-04
Loss = 1.7582e-03, PNorm = 171.7318, GNorm = 0.1021, lr_0 = 1.5137e-04
Loss = 2.1566e-03, PNorm = 171.7348, GNorm = 0.1094, lr_0 = 1.5126e-04
Loss = 2.2517e-03, PNorm = 171.7366, GNorm = 0.1067, lr_0 = 1.5116e-04
Loss = 2.0743e-03, PNorm = 171.7417, GNorm = 0.0708, lr_0 = 1.5106e-04
Loss = 1.6919e-03, PNorm = 171.7463, GNorm = 0.1580, lr_0 = 1.5095e-04
Loss = 2.6537e-03, PNorm = 171.7523, GNorm = 0.1882, lr_0 = 1.5085e-04
Validation mae = 0.277855
Epoch 25
Loss = 1.1187e-03, PNorm = 171.7555, GNorm = 0.1257, lr_0 = 1.5075e-04
Loss = 1.8285e-03, PNorm = 171.7583, GNorm = 0.1470, lr_0 = 1.5064e-04
Loss = 9.6969e-04, PNorm = 171.7590, GNorm = 0.1157, lr_0 = 1.5054e-04
Loss = 1.3221e-03, PNorm = 171.7593, GNorm = 0.0834, lr_0 = 1.5044e-04
Loss = 9.3899e-04, PNorm = 171.7594, GNorm = 0.0708, lr_0 = 1.5033e-04
Loss = 2.6085e-03, PNorm = 171.7611, GNorm = 0.1110, lr_0 = 1.5023e-04
Loss = 1.8593e-03, PNorm = 171.7623, GNorm = 0.1131, lr_0 = 1.5013e-04
Loss = 1.2135e-03, PNorm = 171.7646, GNorm = 0.0360, lr_0 = 1.5002e-04
Loss = 1.6362e-03, PNorm = 171.7679, GNorm = 0.1302, lr_0 = 1.4992e-04
Loss = 1.0420e-03, PNorm = 171.7699, GNorm = 0.1161, lr_0 = 1.4982e-04
Loss = 1.6797e-03, PNorm = 171.7718, GNorm = 0.0772, lr_0 = 1.4972e-04
Loss = 1.1785e-03, PNorm = 171.7736, GNorm = 0.0666, lr_0 = 1.4961e-04
Loss = 1.1836e-03, PNorm = 171.7754, GNorm = 0.1690, lr_0 = 1.4951e-04
Loss = 9.2660e-04, PNorm = 171.7769, GNorm = 0.1001, lr_0 = 1.4941e-04
Loss = 2.3319e-03, PNorm = 171.7780, GNorm = 0.0678, lr_0 = 1.4931e-04
Loss = 1.2314e-03, PNorm = 171.7792, GNorm = 0.1081, lr_0 = 1.4920e-04
Loss = 3.1840e-03, PNorm = 171.7807, GNorm = 0.2099, lr_0 = 1.4910e-04
Loss = 1.8379e-03, PNorm = 171.7832, GNorm = 0.0451, lr_0 = 1.4900e-04
Loss = 1.3871e-03, PNorm = 171.7856, GNorm = 0.1030, lr_0 = 1.4890e-04
Loss = 1.0390e-03, PNorm = 171.7877, GNorm = 0.1284, lr_0 = 1.4880e-04
Loss = 1.1401e-03, PNorm = 171.7898, GNorm = 0.1217, lr_0 = 1.4869e-04
Loss = 1.1196e-03, PNorm = 171.7935, GNorm = 0.1224, lr_0 = 1.4859e-04
Loss = 3.9141e-03, PNorm = 171.7938, GNorm = 0.0872, lr_0 = 1.4849e-04
Loss = 1.3271e-03, PNorm = 171.7941, GNorm = 0.0795, lr_0 = 1.4839e-04
Loss = 1.1387e-03, PNorm = 171.7932, GNorm = 0.0662, lr_0 = 1.4829e-04
Loss = 1.0994e-03, PNorm = 171.7950, GNorm = 0.1697, lr_0 = 1.4818e-04
Loss = 1.7524e-03, PNorm = 171.7971, GNorm = 0.0865, lr_0 = 1.4808e-04
Loss = 1.2171e-03, PNorm = 171.8001, GNorm = 0.1434, lr_0 = 1.4798e-04
Loss = 2.7075e-03, PNorm = 171.8036, GNorm = 0.0801, lr_0 = 1.4788e-04
Loss = 1.3112e-03, PNorm = 171.8066, GNorm = 0.0606, lr_0 = 1.4778e-04
Loss = 1.5802e-03, PNorm = 171.8102, GNorm = 0.0563, lr_0 = 1.4768e-04
Loss = 1.2032e-03, PNorm = 171.8141, GNorm = 0.0792, lr_0 = 1.4758e-04
Loss = 1.3680e-03, PNorm = 171.8163, GNorm = 0.0736, lr_0 = 1.4748e-04
Loss = 2.0058e-03, PNorm = 171.8171, GNorm = 0.0751, lr_0 = 1.4737e-04
Loss = 1.4861e-03, PNorm = 171.8179, GNorm = 0.0954, lr_0 = 1.4727e-04
Loss = 1.1884e-03, PNorm = 171.8201, GNorm = 0.1763, lr_0 = 1.4717e-04
Loss = 1.6417e-03, PNorm = 171.8246, GNorm = 0.0379, lr_0 = 1.4707e-04
Loss = 1.6512e-03, PNorm = 171.8282, GNorm = 0.0549, lr_0 = 1.4697e-04
Loss = 1.9835e-03, PNorm = 171.8296, GNorm = 0.0966, lr_0 = 1.4687e-04
Loss = 9.9349e-04, PNorm = 171.8317, GNorm = 0.1241, lr_0 = 1.4677e-04
Loss = 1.1630e-03, PNorm = 171.8336, GNorm = 0.1897, lr_0 = 1.4667e-04
Loss = 8.7780e-04, PNorm = 171.8349, GNorm = 0.1036, lr_0 = 1.4657e-04
Loss = 1.6799e-03, PNorm = 171.8372, GNorm = 0.1711, lr_0 = 1.4647e-04
Loss = 1.2838e-03, PNorm = 171.8402, GNorm = 0.1709, lr_0 = 1.4637e-04
Loss = 1.3738e-03, PNorm = 171.8430, GNorm = 0.1357, lr_0 = 1.4627e-04
Loss = 2.3659e-03, PNorm = 171.8440, GNorm = 0.1657, lr_0 = 1.4617e-04
Loss = 1.2788e-03, PNorm = 171.8447, GNorm = 0.1077, lr_0 = 1.4607e-04
Loss = 1.0767e-03, PNorm = 171.8453, GNorm = 0.2492, lr_0 = 1.4597e-04
Loss = 1.7464e-03, PNorm = 171.8474, GNorm = 0.0955, lr_0 = 1.4587e-04
Loss = 9.1403e-04, PNorm = 171.8491, GNorm = 0.1315, lr_0 = 1.4577e-04
Loss = 2.3280e-03, PNorm = 171.8503, GNorm = 0.1632, lr_0 = 1.4567e-04
Loss = 1.4486e-03, PNorm = 171.8518, GNorm = 0.0803, lr_0 = 1.4557e-04
Loss = 4.9706e-03, PNorm = 171.8529, GNorm = 0.2734, lr_0 = 1.4547e-04
Loss = 1.0504e-03, PNorm = 171.8550, GNorm = 0.0579, lr_0 = 1.4537e-04
Loss = 2.8702e-03, PNorm = 171.8556, GNorm = 0.1849, lr_0 = 1.4527e-04
Loss = 2.5794e-03, PNorm = 171.8548, GNorm = 0.1590, lr_0 = 1.4517e-04
Loss = 4.2646e-03, PNorm = 171.8561, GNorm = 0.2643, lr_0 = 1.4507e-04
Loss = 2.2111e-03, PNorm = 171.8575, GNorm = 0.1992, lr_0 = 1.4497e-04
Loss = 1.7274e-03, PNorm = 171.8601, GNorm = 0.0292, lr_0 = 1.4487e-04
Loss = 1.0025e-03, PNorm = 171.8631, GNorm = 0.0992, lr_0 = 1.4477e-04
Loss = 1.2215e-03, PNorm = 171.8672, GNorm = 0.1680, lr_0 = 1.4467e-04
Loss = 2.0327e-03, PNorm = 171.8694, GNorm = 0.0650, lr_0 = 1.4457e-04
Loss = 1.1301e-03, PNorm = 171.8710, GNorm = 0.0450, lr_0 = 1.4447e-04
Loss = 1.2937e-03, PNorm = 171.8736, GNorm = 0.0664, lr_0 = 1.4438e-04
Loss = 1.6192e-03, PNorm = 171.8767, GNorm = 0.1436, lr_0 = 1.4428e-04
Loss = 2.1349e-03, PNorm = 171.8786, GNorm = 0.0545, lr_0 = 1.4418e-04
Loss = 1.8586e-03, PNorm = 171.8799, GNorm = 0.1084, lr_0 = 1.4408e-04
Loss = 2.4157e-03, PNorm = 171.8824, GNorm = 0.0816, lr_0 = 1.4398e-04
Loss = 2.4101e-03, PNorm = 171.8859, GNorm = 0.0861, lr_0 = 1.4388e-04
Loss = 1.4134e-03, PNorm = 171.8881, GNorm = 0.0389, lr_0 = 1.4378e-04
Loss = 2.1313e-03, PNorm = 171.8899, GNorm = 0.1119, lr_0 = 1.4368e-04
Loss = 2.5547e-03, PNorm = 171.8915, GNorm = 0.1039, lr_0 = 1.4359e-04
Loss = 1.0075e-03, PNorm = 171.8942, GNorm = 0.1605, lr_0 = 1.4349e-04
Loss = 2.0864e-03, PNorm = 171.8958, GNorm = 0.1877, lr_0 = 1.4339e-04
Loss = 1.2794e-03, PNorm = 171.9002, GNorm = 0.0901, lr_0 = 1.4329e-04
Loss = 2.7163e-03, PNorm = 171.9022, GNorm = 0.1022, lr_0 = 1.4319e-04
Loss = 1.8147e-03, PNorm = 171.9057, GNorm = 0.0369, lr_0 = 1.4310e-04
Loss = 1.6000e-03, PNorm = 171.9082, GNorm = 0.0485, lr_0 = 1.4300e-04
Loss = 1.7094e-03, PNorm = 171.9124, GNorm = 0.2376, lr_0 = 1.4290e-04
Loss = 2.0832e-03, PNorm = 171.9145, GNorm = 0.1017, lr_0 = 1.4280e-04
Loss = 1.2473e-03, PNorm = 171.9167, GNorm = 0.0715, lr_0 = 1.4270e-04
Loss = 9.4490e-04, PNorm = 171.9185, GNorm = 0.1875, lr_0 = 1.4261e-04
Loss = 1.0758e-03, PNorm = 171.9218, GNorm = 0.0428, lr_0 = 1.4251e-04
Loss = 3.1897e-03, PNorm = 171.9242, GNorm = 0.1005, lr_0 = 1.4241e-04
Loss = 1.2928e-03, PNorm = 171.9259, GNorm = 0.1877, lr_0 = 1.4231e-04
Loss = 1.0312e-03, PNorm = 171.9286, GNorm = 0.2117, lr_0 = 1.4222e-04
Loss = 2.2059e-03, PNorm = 171.9309, GNorm = 0.1371, lr_0 = 1.4212e-04
Loss = 9.1909e-04, PNorm = 171.9337, GNorm = 0.0850, lr_0 = 1.4202e-04
Loss = 1.5842e-03, PNorm = 171.9356, GNorm = 0.0661, lr_0 = 1.4192e-04
Loss = 1.3235e-03, PNorm = 171.9382, GNorm = 0.2420, lr_0 = 1.4183e-04
Loss = 2.2610e-03, PNorm = 171.9400, GNorm = 0.1486, lr_0 = 1.4173e-04
Loss = 3.1438e-03, PNorm = 171.9414, GNorm = 0.1507, lr_0 = 1.4163e-04
Loss = 3.3253e-03, PNorm = 171.9442, GNorm = 0.1112, lr_0 = 1.4153e-04
Loss = 1.1924e-03, PNorm = 171.9476, GNorm = 0.0281, lr_0 = 1.4144e-04
Loss = 2.1216e-03, PNorm = 171.9505, GNorm = 0.1414, lr_0 = 1.4134e-04
Loss = 1.4925e-03, PNorm = 171.9547, GNorm = 0.2379, lr_0 = 1.4124e-04
Loss = 1.8015e-03, PNorm = 171.9576, GNorm = 0.0904, lr_0 = 1.4115e-04
Loss = 9.5391e-04, PNorm = 171.9590, GNorm = 0.2064, lr_0 = 1.4105e-04
Loss = 1.9821e-03, PNorm = 171.9606, GNorm = 0.3228, lr_0 = 1.4095e-04
Loss = 3.0777e-03, PNorm = 171.9642, GNorm = 0.1732, lr_0 = 1.4086e-04
Loss = 1.6112e-03, PNorm = 171.9670, GNorm = 0.2770, lr_0 = 1.4076e-04
Loss = 3.0287e-03, PNorm = 171.9699, GNorm = 0.3380, lr_0 = 1.4066e-04
Loss = 9.8597e-04, PNorm = 171.9707, GNorm = 0.1608, lr_0 = 1.4057e-04
Loss = 1.3158e-03, PNorm = 171.9731, GNorm = 0.1660, lr_0 = 1.4047e-04
Loss = 2.7107e-03, PNorm = 171.9780, GNorm = 0.0674, lr_0 = 1.4038e-04
Loss = 1.2906e-03, PNorm = 171.9811, GNorm = 0.1368, lr_0 = 1.4028e-04
Loss = 1.4104e-03, PNorm = 171.9840, GNorm = 0.1046, lr_0 = 1.4018e-04
Loss = 2.2978e-03, PNorm = 171.9857, GNorm = 0.2062, lr_0 = 1.4009e-04
Loss = 1.1328e-03, PNorm = 171.9875, GNorm = 0.2164, lr_0 = 1.3999e-04
Loss = 2.4477e-03, PNorm = 171.9894, GNorm = 0.1874, lr_0 = 1.3990e-04
Loss = 1.1120e-03, PNorm = 171.9914, GNorm = 0.0998, lr_0 = 1.3980e-04
Loss = 9.7953e-04, PNorm = 171.9941, GNorm = 0.1227, lr_0 = 1.3970e-04
Loss = 1.9883e-03, PNorm = 171.9955, GNorm = 0.0573, lr_0 = 1.3961e-04
Loss = 1.5785e-03, PNorm = 171.9975, GNorm = 0.1167, lr_0 = 1.3951e-04
Loss = 9.0806e-04, PNorm = 171.9988, GNorm = 0.1831, lr_0 = 1.3942e-04
Loss = 2.9341e-03, PNorm = 172.0020, GNorm = 0.5909, lr_0 = 1.3932e-04
Loss = 1.5508e-03, PNorm = 172.0047, GNorm = 0.1260, lr_0 = 1.3923e-04
Loss = 2.4773e-03, PNorm = 172.0079, GNorm = 0.1495, lr_0 = 1.3913e-04
Loss = 2.3027e-03, PNorm = 172.0092, GNorm = 0.1659, lr_0 = 1.3904e-04
Loss = 1.0502e-03, PNorm = 172.0113, GNorm = 0.0358, lr_0 = 1.3894e-04
Validation mae = 0.277787
Epoch 26
Loss = 3.6052e-03, PNorm = 172.0137, GNorm = 0.1282, lr_0 = 1.3884e-04
Loss = 1.8538e-03, PNorm = 172.0153, GNorm = 0.2615, lr_0 = 1.3875e-04
Loss = 9.4819e-04, PNorm = 172.0164, GNorm = 0.2010, lr_0 = 1.3865e-04
Loss = 9.8703e-04, PNorm = 172.0182, GNorm = 0.1635, lr_0 = 1.3856e-04
Loss = 9.2476e-04, PNorm = 172.0187, GNorm = 0.0607, lr_0 = 1.3846e-04
Loss = 1.0730e-03, PNorm = 172.0190, GNorm = 0.0485, lr_0 = 1.3837e-04
Loss = 1.2442e-03, PNorm = 172.0206, GNorm = 0.0914, lr_0 = 1.3828e-04
Loss = 7.4886e-04, PNorm = 172.0224, GNorm = 0.0806, lr_0 = 1.3818e-04
Loss = 1.4574e-03, PNorm = 172.0241, GNorm = 0.0657, lr_0 = 1.3809e-04
Loss = 8.3239e-04, PNorm = 172.0257, GNorm = 0.0651, lr_0 = 1.3799e-04
Loss = 1.2093e-03, PNorm = 172.0267, GNorm = 0.0869, lr_0 = 1.3790e-04
Loss = 1.2484e-03, PNorm = 172.0287, GNorm = 0.1145, lr_0 = 1.3780e-04
Loss = 1.4145e-03, PNorm = 172.0294, GNorm = 0.1151, lr_0 = 1.3771e-04
Loss = 2.4421e-03, PNorm = 172.0323, GNorm = 0.1434, lr_0 = 1.3761e-04
Loss = 1.0604e-03, PNorm = 172.0341, GNorm = 0.1714, lr_0 = 1.3752e-04
Loss = 1.6415e-03, PNorm = 172.0359, GNorm = 0.1338, lr_0 = 1.3742e-04
Loss = 1.3907e-03, PNorm = 172.0381, GNorm = 0.0999, lr_0 = 1.3733e-04
Loss = 1.7971e-03, PNorm = 172.0417, GNorm = 0.0447, lr_0 = 1.3724e-04
Loss = 1.4126e-03, PNorm = 172.0433, GNorm = 0.2968, lr_0 = 1.3714e-04
Loss = 8.5056e-04, PNorm = 172.0436, GNorm = 0.1700, lr_0 = 1.3705e-04
Loss = 9.9496e-04, PNorm = 172.0445, GNorm = 0.0411, lr_0 = 1.3695e-04
Loss = 9.6919e-04, PNorm = 172.0453, GNorm = 0.0293, lr_0 = 1.3686e-04
Loss = 9.8206e-04, PNorm = 172.0452, GNorm = 0.1352, lr_0 = 1.3677e-04
Loss = 1.8424e-03, PNorm = 172.0470, GNorm = 0.1699, lr_0 = 1.3667e-04
Loss = 9.9693e-04, PNorm = 172.0503, GNorm = 0.1212, lr_0 = 1.3658e-04
Loss = 2.3055e-03, PNorm = 172.0525, GNorm = 0.1305, lr_0 = 1.3649e-04
Loss = 9.8785e-04, PNorm = 172.0531, GNorm = 0.1875, lr_0 = 1.3639e-04
Loss = 1.4274e-03, PNorm = 172.0527, GNorm = 0.2087, lr_0 = 1.3630e-04
Loss = 9.0427e-04, PNorm = 172.0543, GNorm = 0.0373, lr_0 = 1.3621e-04
Loss = 2.6926e-03, PNorm = 172.0564, GNorm = 0.1488, lr_0 = 1.3611e-04
Loss = 9.4903e-04, PNorm = 172.0580, GNorm = 0.1038, lr_0 = 1.3602e-04
Loss = 1.0126e-03, PNorm = 172.0608, GNorm = 0.1805, lr_0 = 1.3593e-04
Loss = 9.4566e-04, PNorm = 172.0634, GNorm = 0.0351, lr_0 = 1.3583e-04
Loss = 1.4151e-03, PNorm = 172.0653, GNorm = 0.0795, lr_0 = 1.3574e-04
Loss = 2.3222e-03, PNorm = 172.0679, GNorm = 0.1586, lr_0 = 1.3565e-04
Loss = 8.5215e-04, PNorm = 172.0692, GNorm = 0.1121, lr_0 = 1.3555e-04
Loss = 9.8708e-04, PNorm = 172.0702, GNorm = 0.0508, lr_0 = 1.3546e-04
Loss = 2.5210e-03, PNorm = 172.0711, GNorm = 0.2409, lr_0 = 1.3537e-04
Loss = 1.1152e-03, PNorm = 172.0733, GNorm = 0.1004, lr_0 = 1.3528e-04
Loss = 1.5774e-03, PNorm = 172.0750, GNorm = 0.2394, lr_0 = 1.3518e-04
Loss = 1.9318e-03, PNorm = 172.0759, GNorm = 0.1060, lr_0 = 1.3509e-04
Loss = 1.4200e-03, PNorm = 172.0770, GNorm = 0.1345, lr_0 = 1.3500e-04
Loss = 1.5888e-03, PNorm = 172.0803, GNorm = 0.0786, lr_0 = 1.3491e-04
Loss = 1.4909e-03, PNorm = 172.0823, GNorm = 0.0728, lr_0 = 1.3481e-04
Loss = 3.5720e-03, PNorm = 172.0841, GNorm = 0.0542, lr_0 = 1.3472e-04
Loss = 1.5382e-03, PNorm = 172.0868, GNorm = 0.0791, lr_0 = 1.3463e-04
Loss = 1.1878e-03, PNorm = 172.0885, GNorm = 0.1858, lr_0 = 1.3454e-04
Loss = 1.0642e-03, PNorm = 172.0903, GNorm = 0.1195, lr_0 = 1.3444e-04
Loss = 1.0698e-03, PNorm = 172.0923, GNorm = 0.0480, lr_0 = 1.3435e-04
Loss = 1.2534e-03, PNorm = 172.0949, GNorm = 0.1101, lr_0 = 1.3426e-04
Loss = 2.1364e-03, PNorm = 172.0959, GNorm = 0.0655, lr_0 = 1.3417e-04
Loss = 1.0415e-03, PNorm = 172.0980, GNorm = 0.1499, lr_0 = 1.3408e-04
Loss = 9.0044e-04, PNorm = 172.0993, GNorm = 0.0704, lr_0 = 1.3398e-04
Loss = 9.7849e-04, PNorm = 172.1014, GNorm = 0.1111, lr_0 = 1.3389e-04
Loss = 8.5819e-04, PNorm = 172.1035, GNorm = 0.1313, lr_0 = 1.3380e-04
Loss = 1.2130e-03, PNorm = 172.1045, GNorm = 0.1064, lr_0 = 1.3371e-04
Loss = 1.4937e-03, PNorm = 172.1056, GNorm = 0.0609, lr_0 = 1.3362e-04
Loss = 1.4312e-03, PNorm = 172.1069, GNorm = 0.1379, lr_0 = 1.3353e-04
Loss = 1.4016e-03, PNorm = 172.1102, GNorm = 0.0892, lr_0 = 1.3343e-04
Loss = 1.1470e-03, PNorm = 172.1118, GNorm = 0.0813, lr_0 = 1.3334e-04
Loss = 8.6378e-04, PNorm = 172.1139, GNorm = 0.0804, lr_0 = 1.3325e-04
Loss = 1.6396e-03, PNorm = 172.1160, GNorm = 0.0385, lr_0 = 1.3316e-04
Loss = 3.5232e-03, PNorm = 172.1169, GNorm = 1.0173, lr_0 = 1.3307e-04
Loss = 1.4898e-03, PNorm = 172.1181, GNorm = 0.0922, lr_0 = 1.3298e-04
Loss = 8.9192e-04, PNorm = 172.1210, GNorm = 0.0543, lr_0 = 1.3289e-04
Loss = 9.1032e-04, PNorm = 172.1235, GNorm = 0.0421, lr_0 = 1.3280e-04
Loss = 9.0152e-04, PNorm = 172.1259, GNorm = 0.1248, lr_0 = 1.3270e-04
Loss = 1.1738e-03, PNorm = 172.1273, GNorm = 0.1511, lr_0 = 1.3261e-04
Loss = 3.9083e-03, PNorm = 172.1287, GNorm = 0.0667, lr_0 = 1.3252e-04
Loss = 3.5460e-03, PNorm = 172.1307, GNorm = 0.0758, lr_0 = 1.3243e-04
Loss = 8.5308e-04, PNorm = 172.1324, GNorm = 0.0359, lr_0 = 1.3234e-04
Loss = 1.4392e-03, PNorm = 172.1348, GNorm = 0.0821, lr_0 = 1.3225e-04
Loss = 2.8935e-03, PNorm = 172.1362, GNorm = 0.0913, lr_0 = 1.3216e-04
Loss = 1.3878e-03, PNorm = 172.1380, GNorm = 0.0712, lr_0 = 1.3207e-04
Loss = 2.3195e-03, PNorm = 172.1398, GNorm = 0.0464, lr_0 = 1.3198e-04
Loss = 1.6878e-03, PNorm = 172.1414, GNorm = 0.0497, lr_0 = 1.3189e-04
Loss = 7.9517e-04, PNorm = 172.1433, GNorm = 0.0859, lr_0 = 1.3180e-04
Loss = 2.5137e-03, PNorm = 172.1462, GNorm = 0.0999, lr_0 = 1.3171e-04
Loss = 1.6373e-03, PNorm = 172.1485, GNorm = 0.2574, lr_0 = 1.3162e-04
Loss = 4.5504e-03, PNorm = 172.1482, GNorm = 0.1209, lr_0 = 1.3153e-04
Loss = 1.0544e-03, PNorm = 172.1478, GNorm = 0.0965, lr_0 = 1.3144e-04
Loss = 2.8637e-03, PNorm = 172.1483, GNorm = 0.1859, lr_0 = 1.3135e-04
Loss = 8.0194e-04, PNorm = 172.1498, GNorm = 0.0465, lr_0 = 1.3126e-04
Loss = 8.4222e-04, PNorm = 172.1522, GNorm = 0.0338, lr_0 = 1.3117e-04
Loss = 1.7093e-03, PNorm = 172.1542, GNorm = 0.1307, lr_0 = 1.3108e-04
Loss = 1.0892e-03, PNorm = 172.1554, GNorm = 0.0861, lr_0 = 1.3099e-04
Loss = 1.7739e-03, PNorm = 172.1587, GNorm = 0.1577, lr_0 = 1.3090e-04
Loss = 2.5933e-03, PNorm = 172.1605, GNorm = 0.1566, lr_0 = 1.3081e-04
Loss = 2.2455e-03, PNorm = 172.1610, GNorm = 0.1843, lr_0 = 1.3072e-04
Loss = 8.0157e-04, PNorm = 172.1629, GNorm = 0.0923, lr_0 = 1.3063e-04
Loss = 1.2372e-03, PNorm = 172.1655, GNorm = 0.1045, lr_0 = 1.3054e-04
Loss = 1.6042e-03, PNorm = 172.1686, GNorm = 0.0594, lr_0 = 1.3045e-04
Loss = 1.3367e-03, PNorm = 172.1715, GNorm = 0.0891, lr_0 = 1.3036e-04
Loss = 3.1664e-03, PNorm = 172.1734, GNorm = 0.3114, lr_0 = 1.3027e-04
Loss = 8.1950e-04, PNorm = 172.1752, GNorm = 0.1034, lr_0 = 1.3018e-04
Loss = 1.1339e-03, PNorm = 172.1775, GNorm = 0.0322, lr_0 = 1.3009e-04
Loss = 1.4464e-03, PNorm = 172.1801, GNorm = 0.1107, lr_0 = 1.3000e-04
Loss = 9.6332e-04, PNorm = 172.1828, GNorm = 0.0742, lr_0 = 1.2992e-04
Loss = 1.3927e-03, PNorm = 172.1847, GNorm = 0.0383, lr_0 = 1.2983e-04
Loss = 2.0006e-03, PNorm = 172.1886, GNorm = 0.1405, lr_0 = 1.2974e-04
Loss = 2.6350e-03, PNorm = 172.1909, GNorm = 0.0570, lr_0 = 1.2965e-04
Loss = 1.8503e-03, PNorm = 172.1916, GNorm = 0.0728, lr_0 = 1.2956e-04
Loss = 1.2009e-03, PNorm = 172.1933, GNorm = 0.0942, lr_0 = 1.2947e-04
Loss = 1.9503e-03, PNorm = 172.1976, GNorm = 0.0263, lr_0 = 1.2938e-04
Loss = 2.2508e-03, PNorm = 172.1999, GNorm = 0.0571, lr_0 = 1.2929e-04
Loss = 2.2066e-03, PNorm = 172.2024, GNorm = 0.1731, lr_0 = 1.2921e-04
Loss = 1.3983e-03, PNorm = 172.2039, GNorm = 0.1158, lr_0 = 1.2912e-04
Loss = 9.7871e-04, PNorm = 172.2066, GNorm = 0.0976, lr_0 = 1.2903e-04
Loss = 1.8410e-03, PNorm = 172.2078, GNorm = 0.0478, lr_0 = 1.2894e-04
Loss = 2.0157e-03, PNorm = 172.2097, GNorm = 0.0901, lr_0 = 1.2885e-04
Loss = 1.2373e-03, PNorm = 172.2119, GNorm = 0.0640, lr_0 = 1.2876e-04
Loss = 1.2308e-03, PNorm = 172.2158, GNorm = 0.0556, lr_0 = 1.2867e-04
Loss = 7.0327e-04, PNorm = 172.2183, GNorm = 0.0400, lr_0 = 1.2859e-04
Loss = 1.4810e-03, PNorm = 172.2202, GNorm = 0.0715, lr_0 = 1.2850e-04
Loss = 1.7432e-03, PNorm = 172.2210, GNorm = 0.0847, lr_0 = 1.2841e-04
Loss = 1.0521e-03, PNorm = 172.2208, GNorm = 0.0767, lr_0 = 1.2832e-04
Loss = 8.2479e-04, PNorm = 172.2220, GNorm = 0.1070, lr_0 = 1.2823e-04
Loss = 1.5709e-03, PNorm = 172.2244, GNorm = 0.0940, lr_0 = 1.2815e-04
Loss = 1.3375e-03, PNorm = 172.2271, GNorm = 0.1468, lr_0 = 1.2806e-04
Loss = 1.9226e-03, PNorm = 172.2276, GNorm = 0.1122, lr_0 = 1.2797e-04
Validation mae = 0.278117
Epoch 27
Loss = 8.3518e-04, PNorm = 172.2288, GNorm = 0.0400, lr_0 = 1.2788e-04
Loss = 7.5555e-04, PNorm = 172.2293, GNorm = 0.0741, lr_0 = 1.2780e-04
Loss = 7.6567e-04, PNorm = 172.2303, GNorm = 0.0987, lr_0 = 1.2771e-04
Loss = 1.4151e-03, PNorm = 172.2319, GNorm = 0.0896, lr_0 = 1.2762e-04
Loss = 1.7903e-03, PNorm = 172.2326, GNorm = 0.1178, lr_0 = 1.2753e-04
Loss = 8.7222e-04, PNorm = 172.2346, GNorm = 0.1326, lr_0 = 1.2745e-04
Loss = 1.2756e-03, PNorm = 172.2367, GNorm = 0.0674, lr_0 = 1.2736e-04
Loss = 8.7521e-04, PNorm = 172.2388, GNorm = 0.0873, lr_0 = 1.2727e-04
Loss = 1.2418e-03, PNorm = 172.2404, GNorm = 0.0839, lr_0 = 1.2718e-04
Loss = 1.7098e-03, PNorm = 172.2423, GNorm = 0.0735, lr_0 = 1.2710e-04
Loss = 1.2588e-03, PNorm = 172.2449, GNorm = 0.1648, lr_0 = 1.2701e-04
Loss = 6.6518e-04, PNorm = 172.2470, GNorm = 0.1082, lr_0 = 1.2692e-04
Loss = 1.9044e-03, PNorm = 172.2473, GNorm = 0.0611, lr_0 = 1.2684e-04
Loss = 2.0018e-03, PNorm = 172.2472, GNorm = 0.0741, lr_0 = 1.2675e-04
Loss = 1.3442e-03, PNorm = 172.2478, GNorm = 0.1337, lr_0 = 1.2666e-04
Loss = 6.3400e-04, PNorm = 172.2494, GNorm = 0.2066, lr_0 = 1.2658e-04
Loss = 1.7921e-03, PNorm = 172.2503, GNorm = 0.0568, lr_0 = 1.2649e-04
Loss = 9.0092e-04, PNorm = 172.2513, GNorm = 0.0689, lr_0 = 1.2640e-04
Loss = 2.7394e-03, PNorm = 172.2517, GNorm = 0.0447, lr_0 = 1.2632e-04
Loss = 1.8766e-03, PNorm = 172.2545, GNorm = 0.1434, lr_0 = 1.2623e-04
Loss = 1.1715e-03, PNorm = 172.2558, GNorm = 0.0698, lr_0 = 1.2614e-04
Loss = 1.2170e-03, PNorm = 172.2561, GNorm = 0.1295, lr_0 = 1.2606e-04
Loss = 7.9162e-04, PNorm = 172.2554, GNorm = 0.2032, lr_0 = 1.2597e-04
Loss = 1.5508e-03, PNorm = 172.2563, GNorm = 0.0860, lr_0 = 1.2588e-04
Loss = 1.5592e-03, PNorm = 172.2562, GNorm = 0.1142, lr_0 = 1.2580e-04
Loss = 2.0692e-03, PNorm = 172.2586, GNorm = 0.2634, lr_0 = 1.2571e-04
Loss = 8.7452e-04, PNorm = 172.2594, GNorm = 0.0591, lr_0 = 1.2563e-04
Loss = 4.5501e-03, PNorm = 172.2609, GNorm = 0.0916, lr_0 = 1.2554e-04
Loss = 1.4084e-03, PNorm = 172.2631, GNorm = 0.0990, lr_0 = 1.2545e-04
Loss = 6.9948e-04, PNorm = 172.2656, GNorm = 0.1894, lr_0 = 1.2537e-04
Loss = 1.5992e-03, PNorm = 172.2685, GNorm = 0.1420, lr_0 = 1.2528e-04
Loss = 1.7438e-03, PNorm = 172.2693, GNorm = 0.0604, lr_0 = 1.2520e-04
Loss = 5.9279e-04, PNorm = 172.2706, GNorm = 0.1479, lr_0 = 1.2511e-04
Loss = 7.3064e-04, PNorm = 172.2722, GNorm = 0.1471, lr_0 = 1.2502e-04
Loss = 1.1479e-03, PNorm = 172.2738, GNorm = 0.0582, lr_0 = 1.2494e-04
Loss = 1.0820e-03, PNorm = 172.2747, GNorm = 0.1049, lr_0 = 1.2485e-04
Loss = 8.4733e-04, PNorm = 172.2769, GNorm = 0.0413, lr_0 = 1.2477e-04
Loss = 1.6407e-03, PNorm = 172.2788, GNorm = 0.0809, lr_0 = 1.2468e-04
Loss = 1.7954e-03, PNorm = 172.2795, GNorm = 0.0947, lr_0 = 1.2460e-04
Loss = 7.4206e-04, PNorm = 172.2812, GNorm = 0.0192, lr_0 = 1.2451e-04
Loss = 1.5560e-03, PNorm = 172.2840, GNorm = 0.1326, lr_0 = 1.2443e-04
Loss = 1.0869e-03, PNorm = 172.2838, GNorm = 0.1095, lr_0 = 1.2434e-04
Loss = 6.9707e-04, PNorm = 172.2839, GNorm = 0.0651, lr_0 = 1.2426e-04
Loss = 1.1393e-03, PNorm = 172.2854, GNorm = 0.0244, lr_0 = 1.2417e-04
Loss = 6.9349e-04, PNorm = 172.2878, GNorm = 0.0414, lr_0 = 1.2409e-04
Loss = 6.7905e-04, PNorm = 172.2891, GNorm = 0.0819, lr_0 = 1.2400e-04
Loss = 7.9302e-04, PNorm = 172.2900, GNorm = 0.1705, lr_0 = 1.2392e-04
Loss = 8.5532e-04, PNorm = 172.2913, GNorm = 0.1110, lr_0 = 1.2383e-04
Loss = 1.0693e-03, PNorm = 172.2917, GNorm = 0.0474, lr_0 = 1.2375e-04
Loss = 9.7660e-04, PNorm = 172.2926, GNorm = 0.1065, lr_0 = 1.2366e-04
Loss = 2.0804e-03, PNorm = 172.2940, GNorm = 0.1339, lr_0 = 1.2358e-04
Loss = 1.3941e-03, PNorm = 172.2950, GNorm = 0.0419, lr_0 = 1.2349e-04
Loss = 7.2628e-04, PNorm = 172.2968, GNorm = 0.0597, lr_0 = 1.2341e-04
Loss = 1.2930e-03, PNorm = 172.2989, GNorm = 0.0934, lr_0 = 1.2332e-04
Loss = 6.1190e-04, PNorm = 172.3008, GNorm = 0.1685, lr_0 = 1.2324e-04
Loss = 9.1417e-04, PNorm = 172.3033, GNorm = 0.0522, lr_0 = 1.2315e-04
Loss = 1.7963e-03, PNorm = 172.3049, GNorm = 0.2936, lr_0 = 1.2307e-04
Loss = 1.2384e-03, PNorm = 172.3049, GNorm = 0.1230, lr_0 = 1.2298e-04
Loss = 1.5576e-03, PNorm = 172.3060, GNorm = 0.1309, lr_0 = 1.2290e-04
Loss = 2.0586e-03, PNorm = 172.3077, GNorm = 0.0446, lr_0 = 1.2282e-04
Loss = 6.3847e-04, PNorm = 172.3094, GNorm = 0.0928, lr_0 = 1.2273e-04
Loss = 1.1218e-03, PNorm = 172.3117, GNorm = 0.2155, lr_0 = 1.2265e-04
Loss = 2.7565e-03, PNorm = 172.3139, GNorm = 0.1906, lr_0 = 1.2256e-04
Loss = 6.8098e-04, PNorm = 172.3164, GNorm = 0.0716, lr_0 = 1.2248e-04
Loss = 7.0736e-04, PNorm = 172.3185, GNorm = 0.1233, lr_0 = 1.2240e-04
Loss = 1.7866e-03, PNorm = 172.3197, GNorm = 0.1203, lr_0 = 1.2231e-04
Loss = 8.3884e-04, PNorm = 172.3214, GNorm = 0.0946, lr_0 = 1.2223e-04
Loss = 3.7897e-03, PNorm = 172.3237, GNorm = 0.0489, lr_0 = 1.2214e-04
Loss = 1.3807e-03, PNorm = 172.3264, GNorm = 0.0815, lr_0 = 1.2206e-04
Loss = 7.7334e-04, PNorm = 172.3284, GNorm = 0.0966, lr_0 = 1.2198e-04
Loss = 2.4199e-03, PNorm = 172.3305, GNorm = 0.1024, lr_0 = 1.2189e-04
Loss = 3.2424e-03, PNorm = 172.3324, GNorm = 0.1596, lr_0 = 1.2181e-04
Loss = 1.5259e-03, PNorm = 172.3335, GNorm = 0.1110, lr_0 = 1.2173e-04
Loss = 9.4577e-04, PNorm = 172.3348, GNorm = 0.0653, lr_0 = 1.2164e-04
Loss = 2.5790e-03, PNorm = 172.3359, GNorm = 0.0677, lr_0 = 1.2156e-04
Loss = 1.5018e-03, PNorm = 172.3378, GNorm = 0.1940, lr_0 = 1.2148e-04
Loss = 7.3170e-04, PNorm = 172.3399, GNorm = 0.0631, lr_0 = 1.2139e-04
Loss = 2.6580e-03, PNorm = 172.3410, GNorm = 0.0986, lr_0 = 1.2131e-04
Loss = 1.3045e-03, PNorm = 172.3435, GNorm = 0.0358, lr_0 = 1.2123e-04
Loss = 7.0136e-04, PNorm = 172.3453, GNorm = 0.0433, lr_0 = 1.2114e-04
Loss = 3.8485e-03, PNorm = 172.3481, GNorm = 0.1120, lr_0 = 1.2106e-04
Loss = 8.0872e-04, PNorm = 172.3493, GNorm = 0.1032, lr_0 = 1.2098e-04
Loss = 1.3541e-03, PNorm = 172.3516, GNorm = 0.0375, lr_0 = 1.2090e-04
Loss = 1.0395e-03, PNorm = 172.3542, GNorm = 0.0976, lr_0 = 1.2081e-04
Loss = 7.4748e-04, PNorm = 172.3560, GNorm = 0.1176, lr_0 = 1.2073e-04
Loss = 2.6229e-03, PNorm = 172.3556, GNorm = 0.1138, lr_0 = 1.2065e-04
Loss = 1.0405e-03, PNorm = 172.3569, GNorm = 0.1881, lr_0 = 1.2056e-04
Loss = 1.2356e-03, PNorm = 172.3586, GNorm = 0.0406, lr_0 = 1.2048e-04
Loss = 1.1077e-03, PNorm = 172.3586, GNorm = 0.0485, lr_0 = 1.2040e-04
Loss = 1.1464e-03, PNorm = 172.3595, GNorm = 0.0559, lr_0 = 1.2032e-04
Loss = 1.0255e-03, PNorm = 172.3618, GNorm = 0.0799, lr_0 = 1.2023e-04
Loss = 1.5045e-03, PNorm = 172.3646, GNorm = 0.1603, lr_0 = 1.2015e-04
Loss = 2.5849e-03, PNorm = 172.3643, GNorm = 0.0454, lr_0 = 1.2007e-04
Loss = 1.5862e-03, PNorm = 172.3655, GNorm = 0.0998, lr_0 = 1.1999e-04
Loss = 1.3994e-03, PNorm = 172.3663, GNorm = 0.1589, lr_0 = 1.1991e-04
Loss = 9.5734e-04, PNorm = 172.3671, GNorm = 0.0599, lr_0 = 1.1982e-04
Loss = 3.2923e-03, PNorm = 172.3686, GNorm = 0.1093, lr_0 = 1.1974e-04
Loss = 8.5547e-04, PNorm = 172.3702, GNorm = 0.0780, lr_0 = 1.1966e-04
Loss = 9.5765e-04, PNorm = 172.3712, GNorm = 0.0460, lr_0 = 1.1958e-04
Loss = 2.9930e-03, PNorm = 172.3715, GNorm = 0.0560, lr_0 = 1.1950e-04
Loss = 1.0379e-03, PNorm = 172.3714, GNorm = 0.1481, lr_0 = 1.1941e-04
Loss = 8.3876e-04, PNorm = 172.3735, GNorm = 0.0817, lr_0 = 1.1933e-04
Loss = 1.0675e-03, PNorm = 172.3757, GNorm = 0.1219, lr_0 = 1.1925e-04
Loss = 1.0981e-03, PNorm = 172.3797, GNorm = 0.2100, lr_0 = 1.1917e-04
Loss = 1.0762e-03, PNorm = 172.3830, GNorm = 0.1436, lr_0 = 1.1909e-04
Loss = 1.0969e-03, PNorm = 172.3868, GNorm = 0.0491, lr_0 = 1.1901e-04
Loss = 1.4745e-03, PNorm = 172.3898, GNorm = 0.0481, lr_0 = 1.1892e-04
Loss = 2.5549e-03, PNorm = 172.3923, GNorm = 0.0518, lr_0 = 1.1884e-04
Loss = 1.5803e-03, PNorm = 172.3945, GNorm = 0.1312, lr_0 = 1.1876e-04
Loss = 6.7415e-04, PNorm = 172.3965, GNorm = 0.0801, lr_0 = 1.1868e-04
Loss = 1.8905e-03, PNorm = 172.3974, GNorm = 0.2475, lr_0 = 1.1860e-04
Loss = 2.8425e-03, PNorm = 172.3993, GNorm = 0.3061, lr_0 = 1.1852e-04
Loss = 1.0400e-03, PNorm = 172.3999, GNorm = 0.0773, lr_0 = 1.1844e-04
Loss = 1.3063e-03, PNorm = 172.4001, GNorm = 0.0559, lr_0 = 1.1835e-04
Loss = 9.9829e-04, PNorm = 172.4013, GNorm = 0.0722, lr_0 = 1.1827e-04
Loss = 2.9636e-03, PNorm = 172.4034, GNorm = 0.2062, lr_0 = 1.1819e-04
Loss = 1.3754e-03, PNorm = 172.4061, GNorm = 0.1295, lr_0 = 1.1811e-04
Loss = 1.1171e-03, PNorm = 172.4091, GNorm = 0.0303, lr_0 = 1.1803e-04
Loss = 6.3932e-04, PNorm = 172.4105, GNorm = 0.0786, lr_0 = 1.1795e-04
Loss = 1.5362e-03, PNorm = 172.4115, GNorm = 0.0476, lr_0 = 1.1787e-04
Validation mae = 0.277700
Epoch 28
Loss = 1.6850e-03, PNorm = 172.4114, GNorm = 0.0869, lr_0 = 1.1779e-04
Loss = 1.2268e-03, PNorm = 172.4107, GNorm = 0.0540, lr_0 = 1.1771e-04
Loss = 1.0311e-03, PNorm = 172.4106, GNorm = 0.0695, lr_0 = 1.1763e-04
Loss = 1.7037e-03, PNorm = 172.4109, GNorm = 0.0441, lr_0 = 1.1755e-04
Loss = 1.2504e-03, PNorm = 172.4116, GNorm = 0.0737, lr_0 = 1.1747e-04
Loss = 1.0285e-03, PNorm = 172.4137, GNorm = 0.1332, lr_0 = 1.1739e-04
Loss = 1.9722e-03, PNorm = 172.4160, GNorm = 0.0291, lr_0 = 1.1730e-04
Loss = 1.0044e-03, PNorm = 172.4178, GNorm = 0.0739, lr_0 = 1.1722e-04
Loss = 6.9662e-04, PNorm = 172.4185, GNorm = 0.1904, lr_0 = 1.1714e-04
Loss = 1.1266e-03, PNorm = 172.4195, GNorm = 0.3534, lr_0 = 1.1706e-04
Loss = 1.1751e-03, PNorm = 172.4191, GNorm = 0.1273, lr_0 = 1.1698e-04
Loss = 5.5075e-04, PNorm = 172.4194, GNorm = 0.1164, lr_0 = 1.1690e-04
Loss = 9.7069e-04, PNorm = 172.4202, GNorm = 0.0893, lr_0 = 1.1682e-04
Loss = 7.9222e-04, PNorm = 172.4207, GNorm = 0.0751, lr_0 = 1.1674e-04
Loss = 2.1584e-03, PNorm = 172.4227, GNorm = 0.1488, lr_0 = 1.1666e-04
Loss = 1.4392e-03, PNorm = 172.4243, GNorm = 0.0597, lr_0 = 1.1658e-04
Loss = 9.5639e-04, PNorm = 172.4245, GNorm = 0.0389, lr_0 = 1.1650e-04
Loss = 8.0368e-04, PNorm = 172.4261, GNorm = 0.0342, lr_0 = 1.1642e-04
Loss = 1.7421e-03, PNorm = 172.4275, GNorm = 0.0784, lr_0 = 1.1634e-04
Loss = 5.8481e-04, PNorm = 172.4305, GNorm = 0.0710, lr_0 = 1.1626e-04
Loss = 6.1546e-04, PNorm = 172.4330, GNorm = 0.0657, lr_0 = 1.1618e-04
Loss = 8.0644e-04, PNorm = 172.4337, GNorm = 0.3087, lr_0 = 1.1611e-04
Loss = 1.1796e-03, PNorm = 172.4338, GNorm = 0.1034, lr_0 = 1.1603e-04
Loss = 7.9940e-04, PNorm = 172.4343, GNorm = 0.0496, lr_0 = 1.1595e-04
Loss = 1.3182e-03, PNorm = 172.4343, GNorm = 0.0266, lr_0 = 1.1587e-04
Loss = 6.9560e-04, PNorm = 172.4345, GNorm = 0.1026, lr_0 = 1.1579e-04
Loss = 7.2863e-04, PNorm = 172.4356, GNorm = 0.0582, lr_0 = 1.1571e-04
Loss = 1.2766e-03, PNorm = 172.4385, GNorm = 0.0565, lr_0 = 1.1563e-04
Loss = 1.5121e-03, PNorm = 172.4408, GNorm = 0.0443, lr_0 = 1.1555e-04
Loss = 6.1988e-04, PNorm = 172.4421, GNorm = 0.1400, lr_0 = 1.1547e-04
Loss = 1.5547e-03, PNorm = 172.4447, GNorm = 0.0949, lr_0 = 1.1539e-04
Loss = 1.4306e-03, PNorm = 172.4463, GNorm = 0.2843, lr_0 = 1.1531e-04
Loss = 6.0239e-04, PNorm = 172.4478, GNorm = 0.0962, lr_0 = 1.1523e-04
Loss = 9.3843e-04, PNorm = 172.4487, GNorm = 0.1070, lr_0 = 1.1515e-04
Loss = 1.1387e-03, PNorm = 172.4506, GNorm = 0.0800, lr_0 = 1.1508e-04
Loss = 1.3883e-03, PNorm = 172.4515, GNorm = 0.0646, lr_0 = 1.1500e-04
Loss = 2.2856e-03, PNorm = 172.4531, GNorm = 0.0685, lr_0 = 1.1492e-04
Loss = 1.4896e-03, PNorm = 172.4544, GNorm = 0.1008, lr_0 = 1.1484e-04
Loss = 8.1677e-04, PNorm = 172.4545, GNorm = 0.0328, lr_0 = 1.1476e-04
Loss = 1.3662e-03, PNorm = 172.4549, GNorm = 0.2067, lr_0 = 1.1468e-04
Loss = 6.5269e-04, PNorm = 172.4558, GNorm = 0.1071, lr_0 = 1.1460e-04
Loss = 7.6006e-04, PNorm = 172.4579, GNorm = 0.0873, lr_0 = 1.1452e-04
Loss = 1.2288e-03, PNorm = 172.4598, GNorm = 0.0699, lr_0 = 1.1445e-04
Loss = 6.6106e-04, PNorm = 172.4617, GNorm = 0.0248, lr_0 = 1.1437e-04
Loss = 1.6438e-03, PNorm = 172.4640, GNorm = 0.1249, lr_0 = 1.1429e-04
Loss = 1.9780e-03, PNorm = 172.4659, GNorm = 0.0742, lr_0 = 1.1421e-04
Loss = 8.0316e-04, PNorm = 172.4673, GNorm = 0.0793, lr_0 = 1.1413e-04
Loss = 1.6823e-03, PNorm = 172.4689, GNorm = 0.0657, lr_0 = 1.1405e-04
Loss = 7.5661e-04, PNorm = 172.4696, GNorm = 0.0554, lr_0 = 1.1398e-04
Loss = 6.6972e-04, PNorm = 172.4706, GNorm = 0.1591, lr_0 = 1.1390e-04
Loss = 1.0563e-03, PNorm = 172.4714, GNorm = 0.1140, lr_0 = 1.1382e-04
Loss = 1.9218e-03, PNorm = 172.4732, GNorm = 0.1471, lr_0 = 1.1374e-04
Loss = 1.7715e-03, PNorm = 172.4742, GNorm = 0.1503, lr_0 = 1.1366e-04
Loss = 1.6192e-03, PNorm = 172.4748, GNorm = 0.0372, lr_0 = 1.1359e-04
Loss = 5.5577e-04, PNorm = 172.4761, GNorm = 0.0414, lr_0 = 1.1351e-04
Loss = 1.1115e-03, PNorm = 172.4770, GNorm = 0.0255, lr_0 = 1.1343e-04
Loss = 1.1969e-03, PNorm = 172.4782, GNorm = 0.0915, lr_0 = 1.1335e-04
Loss = 1.7601e-03, PNorm = 172.4794, GNorm = 0.0585, lr_0 = 1.1328e-04
Loss = 6.9632e-04, PNorm = 172.4798, GNorm = 0.0685, lr_0 = 1.1320e-04
Loss = 1.3135e-03, PNorm = 172.4804, GNorm = 0.1133, lr_0 = 1.1312e-04
Loss = 6.3887e-04, PNorm = 172.4803, GNorm = 0.0956, lr_0 = 1.1304e-04
Loss = 1.7400e-03, PNorm = 172.4807, GNorm = 0.2043, lr_0 = 1.1297e-04
Loss = 2.0112e-03, PNorm = 172.4813, GNorm = 0.0414, lr_0 = 1.1289e-04
Loss = 5.6748e-04, PNorm = 172.4812, GNorm = 0.0450, lr_0 = 1.1281e-04
Loss = 7.7994e-04, PNorm = 172.4822, GNorm = 0.1096, lr_0 = 1.1273e-04
Loss = 1.1316e-03, PNorm = 172.4836, GNorm = 0.0347, lr_0 = 1.1266e-04
Loss = 5.5861e-04, PNorm = 172.4853, GNorm = 0.0889, lr_0 = 1.1258e-04
Loss = 7.8442e-04, PNorm = 172.4862, GNorm = 0.0821, lr_0 = 1.1250e-04
Loss = 5.6287e-04, PNorm = 172.4878, GNorm = 0.1120, lr_0 = 1.1243e-04
Loss = 5.8470e-04, PNorm = 172.4891, GNorm = 0.1542, lr_0 = 1.1235e-04
Loss = 1.3052e-03, PNorm = 172.4909, GNorm = 0.0898, lr_0 = 1.1227e-04
Loss = 9.3663e-04, PNorm = 172.4907, GNorm = 0.0543, lr_0 = 1.1219e-04
Loss = 1.3136e-03, PNorm = 172.4916, GNorm = 0.0783, lr_0 = 1.1212e-04
Loss = 1.7252e-03, PNorm = 172.4930, GNorm = 0.0493, lr_0 = 1.1204e-04
Loss = 8.7284e-04, PNorm = 172.4952, GNorm = 0.0400, lr_0 = 1.1196e-04
Loss = 7.6678e-04, PNorm = 172.4963, GNorm = 0.0865, lr_0 = 1.1189e-04
Loss = 4.9987e-04, PNorm = 172.4975, GNorm = 0.0321, lr_0 = 1.1181e-04
Loss = 1.7133e-03, PNorm = 172.4973, GNorm = 0.0700, lr_0 = 1.1173e-04
Loss = 1.5691e-03, PNorm = 172.4994, GNorm = 0.1756, lr_0 = 1.1166e-04
Loss = 8.7357e-04, PNorm = 172.5002, GNorm = 0.0868, lr_0 = 1.1158e-04
Loss = 8.4983e-04, PNorm = 172.5012, GNorm = 0.1176, lr_0 = 1.1150e-04
Loss = 1.0154e-03, PNorm = 172.5031, GNorm = 0.0663, lr_0 = 1.1143e-04
Loss = 6.7533e-04, PNorm = 172.5048, GNorm = 0.0869, lr_0 = 1.1135e-04
Loss = 1.7935e-03, PNorm = 172.5054, GNorm = 0.0561, lr_0 = 1.1128e-04
Loss = 5.5350e-03, PNorm = 172.5067, GNorm = 0.1022, lr_0 = 1.1120e-04
Loss = 2.5918e-03, PNorm = 172.5095, GNorm = 0.0685, lr_0 = 1.1112e-04
Loss = 3.2324e-03, PNorm = 172.5119, GNorm = 0.1932, lr_0 = 1.1105e-04
Loss = 1.8125e-03, PNorm = 172.5146, GNorm = 0.0954, lr_0 = 1.1097e-04
Loss = 1.2999e-03, PNorm = 172.5169, GNorm = 0.0422, lr_0 = 1.1089e-04
Loss = 6.6940e-04, PNorm = 172.5189, GNorm = 0.0838, lr_0 = 1.1082e-04
Loss = 1.0394e-03, PNorm = 172.5197, GNorm = 0.0753, lr_0 = 1.1074e-04
Loss = 3.1018e-03, PNorm = 172.5188, GNorm = 0.1954, lr_0 = 1.1067e-04
Loss = 8.5197e-04, PNorm = 172.5186, GNorm = 0.1273, lr_0 = 1.1059e-04
Loss = 1.2297e-03, PNorm = 172.5196, GNorm = 0.1030, lr_0 = 1.1052e-04
Loss = 8.7818e-04, PNorm = 172.5220, GNorm = 0.0752, lr_0 = 1.1044e-04
Loss = 1.2845e-03, PNorm = 172.5235, GNorm = 0.0952, lr_0 = 1.1036e-04
Loss = 8.0219e-04, PNorm = 172.5263, GNorm = 0.0505, lr_0 = 1.1029e-04
Loss = 7.5431e-04, PNorm = 172.5274, GNorm = 0.0603, lr_0 = 1.1021e-04
Loss = 1.1228e-03, PNorm = 172.5293, GNorm = 0.0366, lr_0 = 1.1014e-04
Loss = 1.4356e-03, PNorm = 172.5310, GNorm = 0.0583, lr_0 = 1.1006e-04
Loss = 3.7937e-03, PNorm = 172.5349, GNorm = 0.1049, lr_0 = 1.0999e-04
Loss = 1.2543e-03, PNorm = 172.5372, GNorm = 0.1464, lr_0 = 1.0991e-04
Loss = 7.4222e-04, PNorm = 172.5391, GNorm = 0.0546, lr_0 = 1.0984e-04
Loss = 5.3556e-04, PNorm = 172.5412, GNorm = 0.0983, lr_0 = 1.0976e-04
Loss = 2.0407e-03, PNorm = 172.5436, GNorm = 0.0986, lr_0 = 1.0969e-04
Loss = 2.0583e-03, PNorm = 172.5451, GNorm = 0.1815, lr_0 = 1.0961e-04
Loss = 2.4190e-03, PNorm = 172.5465, GNorm = 0.1556, lr_0 = 1.0954e-04
Loss = 1.1532e-03, PNorm = 172.5470, GNorm = 0.1489, lr_0 = 1.0946e-04
Loss = 2.3866e-03, PNorm = 172.5474, GNorm = 0.0567, lr_0 = 1.0939e-04
Loss = 1.5360e-03, PNorm = 172.5492, GNorm = 0.1058, lr_0 = 1.0931e-04
Loss = 1.8216e-03, PNorm = 172.5509, GNorm = 0.0611, lr_0 = 1.0924e-04
Loss = 1.0604e-03, PNorm = 172.5535, GNorm = 0.0514, lr_0 = 1.0916e-04
Loss = 8.3866e-04, PNorm = 172.5559, GNorm = 0.1590, lr_0 = 1.0909e-04
Loss = 1.8720e-03, PNorm = 172.5582, GNorm = 0.1298, lr_0 = 1.0901e-04
Loss = 6.5479e-04, PNorm = 172.5591, GNorm = 0.0215, lr_0 = 1.0894e-04
Loss = 9.0486e-04, PNorm = 172.5603, GNorm = 0.0441, lr_0 = 1.0886e-04
Loss = 1.6311e-03, PNorm = 172.5618, GNorm = 0.1570, lr_0 = 1.0879e-04
Loss = 1.2101e-03, PNorm = 172.5618, GNorm = 0.1109, lr_0 = 1.0871e-04
Loss = 3.6421e-03, PNorm = 172.5628, GNorm = 0.0558, lr_0 = 1.0864e-04
Loss = 1.5494e-03, PNorm = 172.5649, GNorm = 0.1613, lr_0 = 1.0856e-04
Validation mae = 0.277862
Epoch 29
Loss = 6.4697e-04, PNorm = 172.5663, GNorm = 0.0688, lr_0 = 1.0849e-04
Loss = 9.6378e-04, PNorm = 172.5675, GNorm = 0.1237, lr_0 = 1.0841e-04
Loss = 5.2757e-04, PNorm = 172.5676, GNorm = 0.1212, lr_0 = 1.0834e-04
Loss = 9.3316e-04, PNorm = 172.5672, GNorm = 0.0514, lr_0 = 1.0827e-04
Loss = 4.9529e-04, PNorm = 172.5686, GNorm = 0.0277, lr_0 = 1.0819e-04
Loss = 5.6847e-04, PNorm = 172.5694, GNorm = 0.0231, lr_0 = 1.0812e-04
Loss = 1.3343e-03, PNorm = 172.5713, GNorm = 0.0703, lr_0 = 1.0804e-04
Loss = 1.0412e-03, PNorm = 172.5726, GNorm = 0.1441, lr_0 = 1.0797e-04
Loss = 5.4743e-04, PNorm = 172.5747, GNorm = 0.0882, lr_0 = 1.0790e-04
Loss = 1.0444e-03, PNorm = 172.5755, GNorm = 0.1339, lr_0 = 1.0782e-04
Loss = 5.8067e-04, PNorm = 172.5769, GNorm = 0.0741, lr_0 = 1.0775e-04
Loss = 9.9189e-04, PNorm = 172.5778, GNorm = 0.0715, lr_0 = 1.0767e-04
Loss = 4.9386e-04, PNorm = 172.5794, GNorm = 0.0268, lr_0 = 1.0760e-04
Loss = 5.3298e-04, PNorm = 172.5821, GNorm = 0.0541, lr_0 = 1.0753e-04
Loss = 8.0620e-04, PNorm = 172.5848, GNorm = 0.0572, lr_0 = 1.0745e-04
Loss = 1.0933e-03, PNorm = 172.5865, GNorm = 0.0444, lr_0 = 1.0738e-04
Loss = 1.2366e-03, PNorm = 172.5868, GNorm = 0.0626, lr_0 = 1.0731e-04
Loss = 5.6264e-04, PNorm = 172.5872, GNorm = 0.0301, lr_0 = 1.0723e-04
Loss = 1.6978e-03, PNorm = 172.5875, GNorm = 0.1584, lr_0 = 1.0716e-04
Loss = 8.7871e-04, PNorm = 172.5875, GNorm = 0.0298, lr_0 = 1.0709e-04
Loss = 1.8350e-03, PNorm = 172.5885, GNorm = 0.1053, lr_0 = 1.0701e-04
Loss = 8.9653e-04, PNorm = 172.5906, GNorm = 0.1467, lr_0 = 1.0694e-04
Loss = 1.8326e-03, PNorm = 172.5920, GNorm = 0.1153, lr_0 = 1.0687e-04
Loss = 1.8148e-03, PNorm = 172.5937, GNorm = 0.0627, lr_0 = 1.0679e-04
Loss = 9.3919e-04, PNorm = 172.5955, GNorm = 0.1276, lr_0 = 1.0672e-04
Loss = 1.1227e-03, PNorm = 172.5972, GNorm = 0.1789, lr_0 = 1.0665e-04
Loss = 6.4287e-04, PNorm = 172.5982, GNorm = 0.0357, lr_0 = 1.0657e-04
Loss = 1.2709e-03, PNorm = 172.5994, GNorm = 0.0602, lr_0 = 1.0650e-04
Loss = 6.3451e-04, PNorm = 172.6003, GNorm = 0.0979, lr_0 = 1.0643e-04
Loss = 1.3331e-03, PNorm = 172.6013, GNorm = 0.0705, lr_0 = 1.0635e-04
Loss = 1.1948e-03, PNorm = 172.6024, GNorm = 0.0818, lr_0 = 1.0628e-04
Loss = 1.8778e-03, PNorm = 172.6035, GNorm = 0.0486, lr_0 = 1.0621e-04
Loss = 1.5008e-03, PNorm = 172.6038, GNorm = 0.0296, lr_0 = 1.0614e-04
Loss = 3.1083e-03, PNorm = 172.6045, GNorm = 0.6783, lr_0 = 1.0606e-04
Loss = 9.8852e-04, PNorm = 172.6062, GNorm = 0.0253, lr_0 = 1.0599e-04
Loss = 1.0882e-03, PNorm = 172.6059, GNorm = 0.1952, lr_0 = 1.0592e-04
Loss = 6.5178e-04, PNorm = 172.6058, GNorm = 0.1876, lr_0 = 1.0585e-04
Loss = 7.4539e-04, PNorm = 172.6055, GNorm = 0.1053, lr_0 = 1.0577e-04
Loss = 6.1316e-04, PNorm = 172.6081, GNorm = 0.1148, lr_0 = 1.0570e-04
Loss = 8.3395e-04, PNorm = 172.6090, GNorm = 0.0852, lr_0 = 1.0563e-04
Loss = 5.7453e-04, PNorm = 172.6100, GNorm = 0.0505, lr_0 = 1.0556e-04
Loss = 1.0234e-03, PNorm = 172.6105, GNorm = 0.1031, lr_0 = 1.0548e-04
Loss = 1.7676e-03, PNorm = 172.6116, GNorm = 0.0424, lr_0 = 1.0541e-04
Loss = 9.2007e-04, PNorm = 172.6123, GNorm = 0.0548, lr_0 = 1.0534e-04
Loss = 1.0843e-03, PNorm = 172.6145, GNorm = 0.0670, lr_0 = 1.0527e-04
Loss = 1.7023e-03, PNorm = 172.6165, GNorm = 0.0859, lr_0 = 1.0519e-04
Loss = 2.5425e-03, PNorm = 172.6187, GNorm = 0.1876, lr_0 = 1.0512e-04
Loss = 1.1701e-03, PNorm = 172.6203, GNorm = 0.0706, lr_0 = 1.0505e-04
Loss = 1.2969e-03, PNorm = 172.6204, GNorm = 0.0711, lr_0 = 1.0498e-04
Loss = 7.8622e-04, PNorm = 172.6200, GNorm = 0.0810, lr_0 = 1.0491e-04
Loss = 1.0274e-03, PNorm = 172.6208, GNorm = 0.0514, lr_0 = 1.0483e-04
Loss = 1.2677e-03, PNorm = 172.6224, GNorm = 0.0737, lr_0 = 1.0476e-04
Loss = 2.4885e-03, PNorm = 172.6224, GNorm = 0.0981, lr_0 = 1.0469e-04
Loss = 1.2379e-03, PNorm = 172.6227, GNorm = 0.1826, lr_0 = 1.0462e-04
Loss = 8.6181e-04, PNorm = 172.6216, GNorm = 0.1302, lr_0 = 1.0455e-04
Loss = 1.3062e-03, PNorm = 172.6227, GNorm = 0.0351, lr_0 = 1.0448e-04
Loss = 7.7627e-04, PNorm = 172.6244, GNorm = 0.0407, lr_0 = 1.0440e-04
Loss = 6.7231e-04, PNorm = 172.6251, GNorm = 0.1317, lr_0 = 1.0433e-04
Loss = 6.4194e-04, PNorm = 172.6261, GNorm = 0.0998, lr_0 = 1.0426e-04
Loss = 2.1594e-03, PNorm = 172.6267, GNorm = 0.0350, lr_0 = 1.0419e-04
Loss = 2.2847e-03, PNorm = 172.6265, GNorm = 0.0773, lr_0 = 1.0412e-04
Loss = 1.1328e-03, PNorm = 172.6277, GNorm = 0.0498, lr_0 = 1.0405e-04
Loss = 1.0690e-03, PNorm = 172.6302, GNorm = 0.2538, lr_0 = 1.0398e-04
Loss = 1.8220e-03, PNorm = 172.6322, GNorm = 0.0626, lr_0 = 1.0391e-04
Loss = 1.7861e-03, PNorm = 172.6332, GNorm = 0.0315, lr_0 = 1.0383e-04
Loss = 5.6170e-04, PNorm = 172.6345, GNorm = 0.0964, lr_0 = 1.0376e-04
Loss = 1.0747e-03, PNorm = 172.6356, GNorm = 0.1247, lr_0 = 1.0369e-04
Loss = 7.7238e-04, PNorm = 172.6370, GNorm = 0.0505, lr_0 = 1.0362e-04
Loss = 6.1314e-04, PNorm = 172.6388, GNorm = 0.0953, lr_0 = 1.0355e-04
Loss = 4.6388e-04, PNorm = 172.6402, GNorm = 0.0386, lr_0 = 1.0348e-04
Loss = 1.1990e-03, PNorm = 172.6419, GNorm = 0.0763, lr_0 = 1.0341e-04
Loss = 4.9040e-04, PNorm = 172.6437, GNorm = 0.0228, lr_0 = 1.0334e-04
Loss = 1.3220e-03, PNorm = 172.6444, GNorm = 0.0627, lr_0 = 1.0327e-04
Loss = 1.4996e-03, PNorm = 172.6460, GNorm = 0.0835, lr_0 = 1.0320e-04
Loss = 1.7199e-03, PNorm = 172.6472, GNorm = 0.1380, lr_0 = 1.0312e-04
Loss = 9.8736e-04, PNorm = 172.6487, GNorm = 0.1374, lr_0 = 1.0305e-04
Loss = 7.8558e-04, PNorm = 172.6487, GNorm = 0.1421, lr_0 = 1.0298e-04
Loss = 2.7409e-03, PNorm = 172.6497, GNorm = 0.0582, lr_0 = 1.0291e-04
Loss = 7.4369e-04, PNorm = 172.6504, GNorm = 0.0885, lr_0 = 1.0284e-04
Loss = 8.7957e-04, PNorm = 172.6505, GNorm = 0.0325, lr_0 = 1.0277e-04
Loss = 1.0269e-03, PNorm = 172.6506, GNorm = 0.0941, lr_0 = 1.0270e-04
Loss = 2.7263e-03, PNorm = 172.6513, GNorm = 0.1178, lr_0 = 1.0263e-04
Loss = 1.9281e-03, PNorm = 172.6518, GNorm = 0.1401, lr_0 = 1.0256e-04
Loss = 8.4007e-04, PNorm = 172.6543, GNorm = 0.0710, lr_0 = 1.0249e-04
Loss = 5.9348e-04, PNorm = 172.6558, GNorm = 0.2614, lr_0 = 1.0242e-04
Loss = 1.4090e-03, PNorm = 172.6559, GNorm = 0.0979, lr_0 = 1.0235e-04
Loss = 9.4411e-04, PNorm = 172.6561, GNorm = 0.1503, lr_0 = 1.0228e-04
Loss = 1.3401e-03, PNorm = 172.6577, GNorm = 0.1801, lr_0 = 1.0221e-04
Loss = 1.9824e-03, PNorm = 172.6598, GNorm = 0.3717, lr_0 = 1.0214e-04
Loss = 1.0042e-03, PNorm = 172.6619, GNorm = 0.1021, lr_0 = 1.0207e-04
Loss = 1.1632e-03, PNorm = 172.6632, GNorm = 0.2273, lr_0 = 1.0200e-04
Loss = 1.5696e-03, PNorm = 172.6649, GNorm = 0.2657, lr_0 = 1.0193e-04
Loss = 1.0921e-03, PNorm = 172.6653, GNorm = 0.0487, lr_0 = 1.0186e-04
Loss = 2.3339e-03, PNorm = 172.6662, GNorm = 0.0905, lr_0 = 1.0179e-04
Loss = 1.3304e-03, PNorm = 172.6685, GNorm = 0.0368, lr_0 = 1.0172e-04
Loss = 3.3900e-03, PNorm = 172.6691, GNorm = 0.1365, lr_0 = 1.0165e-04
Loss = 2.5531e-03, PNorm = 172.6702, GNorm = 0.1258, lr_0 = 1.0158e-04
Loss = 2.5840e-03, PNorm = 172.6722, GNorm = 0.1425, lr_0 = 1.0151e-04
Loss = 5.6537e-04, PNorm = 172.6752, GNorm = 0.1676, lr_0 = 1.0144e-04
Loss = 6.9854e-04, PNorm = 172.6770, GNorm = 0.0792, lr_0 = 1.0137e-04
Loss = 5.8559e-04, PNorm = 172.6782, GNorm = 0.0572, lr_0 = 1.0130e-04
Loss = 1.9351e-03, PNorm = 172.6794, GNorm = 0.0613, lr_0 = 1.0123e-04
Loss = 6.8542e-04, PNorm = 172.6814, GNorm = 0.0959, lr_0 = 1.0116e-04
Loss = 1.0757e-03, PNorm = 172.6827, GNorm = 0.0395, lr_0 = 1.0110e-04
Loss = 8.3124e-04, PNorm = 172.6829, GNorm = 0.0965, lr_0 = 1.0103e-04
Loss = 5.2777e-04, PNorm = 172.6831, GNorm = 0.0329, lr_0 = 1.0096e-04
Loss = 1.1191e-03, PNorm = 172.6836, GNorm = 0.0392, lr_0 = 1.0089e-04
Loss = 1.2345e-03, PNorm = 172.6848, GNorm = 0.1432, lr_0 = 1.0082e-04
Loss = 7.6674e-04, PNorm = 172.6858, GNorm = 0.0835, lr_0 = 1.0075e-04
Loss = 8.3227e-04, PNorm = 172.6870, GNorm = 0.0912, lr_0 = 1.0068e-04
Loss = 4.3202e-04, PNorm = 172.6881, GNorm = 0.0326, lr_0 = 1.0061e-04
Loss = 9.6120e-04, PNorm = 172.6893, GNorm = 0.1994, lr_0 = 1.0054e-04
Loss = 2.5381e-03, PNorm = 172.6906, GNorm = 0.2501, lr_0 = 1.0047e-04
Loss = 2.0226e-03, PNorm = 172.6914, GNorm = 0.0479, lr_0 = 1.0041e-04
Loss = 1.0412e-03, PNorm = 172.6923, GNorm = 0.0784, lr_0 = 1.0034e-04
Loss = 1.2895e-03, PNorm = 172.6929, GNorm = 0.0629, lr_0 = 1.0027e-04
Loss = 5.3645e-04, PNorm = 172.6939, GNorm = 0.0771, lr_0 = 1.0020e-04
Loss = 1.6240e-03, PNorm = 172.6941, GNorm = 0.0863, lr_0 = 1.0013e-04
Loss = 5.5037e-04, PNorm = 172.6945, GNorm = 0.1955, lr_0 = 1.0006e-04
Loss = 2.0095e-03, PNorm = 172.6970, GNorm = 0.1107, lr_0 = 1.0000e-04
Validation mae = 0.277436
Model 0 best validation mae = 0.277436 on epoch 29
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.274369
Ensemble test mae = 0.274369
Fold 9
Splitting data with seed 9
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2200, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2200, out_features=2200, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2200, out_features=1, bias=True)
  )
)
Number of parameters = 9,352,201
Moving model to cuda
Epoch 0
Loss = 9.2720e-01, PNorm = 65.7686, GNorm = 1.7288, lr_0 = 1.0413e-04
Loss = 6.5342e-01, PNorm = 65.7837, GNorm = 1.7688, lr_0 = 1.0788e-04
Loss = 5.1398e-01, PNorm = 65.7975, GNorm = 3.3177, lr_0 = 1.1163e-04
Loss = 4.8761e-01, PNorm = 65.8090, GNorm = 2.3715, lr_0 = 1.1537e-04
Loss = 3.9527e-01, PNorm = 65.8196, GNorm = 2.3868, lr_0 = 1.1913e-04
Loss = 4.7530e-01, PNorm = 65.8276, GNorm = 2.5674, lr_0 = 1.2287e-04
Loss = 4.3741e-01, PNorm = 65.8372, GNorm = 2.4016, lr_0 = 1.2663e-04
Loss = 3.9735e-01, PNorm = 65.8474, GNorm = 1.9449, lr_0 = 1.3038e-04
Loss = 3.7589e-01, PNorm = 65.8566, GNorm = 2.0578, lr_0 = 1.3413e-04
Loss = 4.0904e-01, PNorm = 65.8658, GNorm = 2.7383, lr_0 = 1.3788e-04
Loss = 4.2991e-01, PNorm = 65.8757, GNorm = 2.1804, lr_0 = 1.4163e-04
Loss = 3.4749e-01, PNorm = 65.8851, GNorm = 1.8974, lr_0 = 1.4537e-04
Loss = 4.0194e-01, PNorm = 65.8951, GNorm = 2.1544, lr_0 = 1.4913e-04
Loss = 3.5326e-01, PNorm = 65.9075, GNorm = 2.6039, lr_0 = 1.5288e-04
Loss = 3.7742e-01, PNorm = 65.9184, GNorm = 2.2715, lr_0 = 1.5662e-04
Loss = 3.4254e-01, PNorm = 65.9285, GNorm = 1.5233, lr_0 = 1.6038e-04
Loss = 3.2959e-01, PNorm = 65.9401, GNorm = 2.1842, lr_0 = 1.6412e-04
Loss = 3.3724e-01, PNorm = 65.9510, GNorm = 4.4936, lr_0 = 1.6788e-04
Loss = 3.6096e-01, PNorm = 65.9623, GNorm = 2.6879, lr_0 = 1.7163e-04
Loss = 3.2078e-01, PNorm = 65.9756, GNorm = 1.7194, lr_0 = 1.7538e-04
Loss = 3.4641e-01, PNorm = 65.9886, GNorm = 1.8989, lr_0 = 1.7913e-04
Loss = 3.4051e-01, PNorm = 66.0006, GNorm = 2.0269, lr_0 = 1.8288e-04
Loss = 3.6118e-01, PNorm = 66.0147, GNorm = 1.9103, lr_0 = 1.8662e-04
Loss = 3.2796e-01, PNorm = 66.0292, GNorm = 1.9234, lr_0 = 1.9038e-04
Loss = 3.4138e-01, PNorm = 66.0449, GNorm = 1.8255, lr_0 = 1.9413e-04
Loss = 3.1206e-01, PNorm = 66.0590, GNorm = 1.7931, lr_0 = 1.9788e-04
Loss = 3.4915e-01, PNorm = 66.0717, GNorm = 1.8337, lr_0 = 2.0163e-04
Loss = 3.5226e-01, PNorm = 66.0877, GNorm = 1.5793, lr_0 = 2.0537e-04
Loss = 4.4933e-01, PNorm = 66.1072, GNorm = 1.9854, lr_0 = 2.0913e-04
Loss = 3.2387e-01, PNorm = 66.1238, GNorm = 1.7186, lr_0 = 2.1288e-04
Loss = 2.8465e-01, PNorm = 66.1430, GNorm = 1.6665, lr_0 = 2.1663e-04
Loss = 3.5912e-01, PNorm = 66.1589, GNorm = 2.4233, lr_0 = 2.2038e-04
Loss = 3.0309e-01, PNorm = 66.1765, GNorm = 1.5553, lr_0 = 2.2412e-04
Loss = 2.6793e-01, PNorm = 66.1964, GNorm = 1.1681, lr_0 = 2.2787e-04
Loss = 2.8320e-01, PNorm = 66.2104, GNorm = 1.5344, lr_0 = 2.3163e-04
Loss = 2.6516e-01, PNorm = 66.2254, GNorm = 1.2836, lr_0 = 2.3538e-04
Loss = 2.7841e-01, PNorm = 66.2420, GNorm = 1.3758, lr_0 = 2.3913e-04
Loss = 2.9229e-01, PNorm = 66.2600, GNorm = 1.2643, lr_0 = 2.4288e-04
Loss = 2.9567e-01, PNorm = 66.2749, GNorm = 1.3963, lr_0 = 2.4662e-04
Loss = 2.8861e-01, PNorm = 66.2954, GNorm = 1.2792, lr_0 = 2.5038e-04
Loss = 3.1144e-01, PNorm = 66.3151, GNorm = 1.8382, lr_0 = 2.5413e-04
Loss = 2.7287e-01, PNorm = 66.3330, GNorm = 1.3362, lr_0 = 2.5788e-04
Loss = 2.7603e-01, PNorm = 66.3534, GNorm = 1.0652, lr_0 = 2.6163e-04
Loss = 3.2116e-01, PNorm = 66.3757, GNorm = 1.3913, lr_0 = 2.6537e-04
Loss = 3.3628e-01, PNorm = 66.3987, GNorm = 2.4651, lr_0 = 2.6912e-04
Loss = 2.7852e-01, PNorm = 66.4242, GNorm = 1.3601, lr_0 = 2.7288e-04
Loss = 2.6589e-01, PNorm = 66.4483, GNorm = 1.9271, lr_0 = 2.7663e-04
Loss = 2.9974e-01, PNorm = 66.4734, GNorm = 1.2036, lr_0 = 2.8038e-04
Loss = 3.2950e-01, PNorm = 66.4982, GNorm = 1.3169, lr_0 = 2.8413e-04
Loss = 3.0071e-01, PNorm = 66.5242, GNorm = 1.3135, lr_0 = 2.8787e-04
Loss = 2.9815e-01, PNorm = 66.5477, GNorm = 1.6257, lr_0 = 2.9163e-04
Loss = 2.8765e-01, PNorm = 66.5734, GNorm = 1.3025, lr_0 = 2.9538e-04
Loss = 3.1495e-01, PNorm = 66.6017, GNorm = 1.4057, lr_0 = 2.9913e-04
Loss = 3.1772e-01, PNorm = 66.6291, GNorm = 1.2713, lr_0 = 3.0288e-04
Loss = 2.9056e-01, PNorm = 66.6540, GNorm = 1.2449, lr_0 = 3.0662e-04
Loss = 2.9286e-01, PNorm = 66.6806, GNorm = 1.4794, lr_0 = 3.1037e-04
Loss = 2.6976e-01, PNorm = 66.7102, GNorm = 1.2862, lr_0 = 3.1413e-04
Loss = 2.7854e-01, PNorm = 66.7353, GNorm = 1.2857, lr_0 = 3.1788e-04
Loss = 2.6198e-01, PNorm = 66.7613, GNorm = 1.4053, lr_0 = 3.2163e-04
Loss = 2.8097e-01, PNorm = 66.7888, GNorm = 1.9183, lr_0 = 3.2538e-04
Loss = 2.9099e-01, PNorm = 66.8183, GNorm = 1.8564, lr_0 = 3.2912e-04
Loss = 2.6762e-01, PNorm = 66.8459, GNorm = 1.7333, lr_0 = 3.3288e-04
Loss = 2.4602e-01, PNorm = 66.8744, GNorm = 0.9603, lr_0 = 3.3663e-04
Loss = 2.9359e-01, PNorm = 66.9024, GNorm = 1.2209, lr_0 = 3.4038e-04
Loss = 3.0171e-01, PNorm = 66.9359, GNorm = 1.1759, lr_0 = 3.4413e-04
Loss = 2.8987e-01, PNorm = 66.9712, GNorm = 1.5277, lr_0 = 3.4787e-04
Loss = 2.8152e-01, PNorm = 67.0046, GNorm = 1.5277, lr_0 = 3.5162e-04
Loss = 3.2576e-01, PNorm = 67.0394, GNorm = 1.1976, lr_0 = 3.5538e-04
Loss = 2.6669e-01, PNorm = 67.0738, GNorm = 1.2856, lr_0 = 3.5913e-04
Loss = 2.6294e-01, PNorm = 67.1119, GNorm = 1.0980, lr_0 = 3.6288e-04
Loss = 3.0764e-01, PNorm = 67.1411, GNorm = 1.4230, lr_0 = 3.6662e-04
Loss = 2.8211e-01, PNorm = 67.1754, GNorm = 1.1137, lr_0 = 3.7037e-04
Loss = 2.6801e-01, PNorm = 67.2118, GNorm = 1.3141, lr_0 = 3.7413e-04
Loss = 2.8813e-01, PNorm = 67.2463, GNorm = 1.0050, lr_0 = 3.7788e-04
Loss = 2.8867e-01, PNorm = 67.2787, GNorm = 1.1750, lr_0 = 3.8163e-04
Loss = 2.8782e-01, PNorm = 67.3153, GNorm = 0.9969, lr_0 = 3.8537e-04
Loss = 2.9010e-01, PNorm = 67.3549, GNorm = 1.8615, lr_0 = 3.8912e-04
Loss = 2.5120e-01, PNorm = 67.3911, GNorm = 1.7125, lr_0 = 3.9287e-04
Loss = 2.8758e-01, PNorm = 67.4269, GNorm = 1.2818, lr_0 = 3.9663e-04
Loss = 2.3960e-01, PNorm = 67.4642, GNorm = 1.0393, lr_0 = 4.0038e-04
Loss = 2.6194e-01, PNorm = 67.4998, GNorm = 1.1144, lr_0 = 4.0413e-04
Loss = 2.4192e-01, PNorm = 67.5394, GNorm = 1.1731, lr_0 = 4.0787e-04
Loss = 2.7818e-01, PNorm = 67.5742, GNorm = 1.0078, lr_0 = 4.1162e-04
Loss = 2.9594e-01, PNorm = 67.6143, GNorm = 1.0987, lr_0 = 4.1537e-04
Loss = 2.5445e-01, PNorm = 67.6584, GNorm = 1.1240, lr_0 = 4.1913e-04
Loss = 3.2117e-01, PNorm = 67.7041, GNorm = 1.3925, lr_0 = 4.2288e-04
Loss = 2.6671e-01, PNorm = 67.7498, GNorm = 1.4556, lr_0 = 4.2662e-04
Loss = 2.7484e-01, PNorm = 67.7966, GNorm = 1.2733, lr_0 = 4.3037e-04
Loss = 2.6068e-01, PNorm = 67.8403, GNorm = 1.0654, lr_0 = 4.3412e-04
Loss = 2.4900e-01, PNorm = 67.8840, GNorm = 0.8749, lr_0 = 4.3788e-04
Loss = 2.7388e-01, PNorm = 67.9267, GNorm = 1.8995, lr_0 = 4.4163e-04
Loss = 3.2511e-01, PNorm = 67.9732, GNorm = 1.2319, lr_0 = 4.4538e-04
Loss = 3.0647e-01, PNorm = 68.0265, GNorm = 1.5728, lr_0 = 4.4912e-04
Loss = 2.5806e-01, PNorm = 68.0747, GNorm = 1.1693, lr_0 = 4.5287e-04
Loss = 2.9480e-01, PNorm = 68.1229, GNorm = 1.3830, lr_0 = 4.5662e-04
Loss = 2.6806e-01, PNorm = 68.1626, GNorm = 1.2518, lr_0 = 4.6038e-04
Loss = 2.7965e-01, PNorm = 68.2126, GNorm = 1.3659, lr_0 = 4.6413e-04
Loss = 2.7838e-01, PNorm = 68.2598, GNorm = 0.8839, lr_0 = 4.6787e-04
Loss = 2.7193e-01, PNorm = 68.3038, GNorm = 0.8621, lr_0 = 4.7162e-04
Loss = 2.6452e-01, PNorm = 68.3497, GNorm = 1.2934, lr_0 = 4.7537e-04
Loss = 2.6490e-01, PNorm = 68.3932, GNorm = 1.0038, lr_0 = 4.7913e-04
Loss = 2.3817e-01, PNorm = 68.4417, GNorm = 1.1132, lr_0 = 4.8288e-04
Loss = 2.7247e-01, PNorm = 68.4862, GNorm = 0.9735, lr_0 = 4.8663e-04
Loss = 2.5123e-01, PNorm = 68.5397, GNorm = 1.2358, lr_0 = 4.9038e-04
Loss = 2.4988e-01, PNorm = 68.5856, GNorm = 1.0263, lr_0 = 4.9412e-04
Loss = 2.6876e-01, PNorm = 68.6333, GNorm = 1.0499, lr_0 = 4.9788e-04
Loss = 2.6548e-01, PNorm = 68.6912, GNorm = 1.2200, lr_0 = 5.0163e-04
Loss = 2.7697e-01, PNorm = 68.7377, GNorm = 0.9210, lr_0 = 5.0538e-04
Loss = 2.7082e-01, PNorm = 68.7915, GNorm = 0.9882, lr_0 = 5.0913e-04
Loss = 2.6177e-01, PNorm = 68.8438, GNorm = 0.8437, lr_0 = 5.1287e-04
Loss = 2.5011e-01, PNorm = 68.9058, GNorm = 0.8580, lr_0 = 5.1663e-04
Loss = 2.6005e-01, PNorm = 68.9536, GNorm = 0.9468, lr_0 = 5.2038e-04
Loss = 2.6602e-01, PNorm = 69.0144, GNorm = 1.3004, lr_0 = 5.2413e-04
Loss = 2.2929e-01, PNorm = 69.0679, GNorm = 0.8771, lr_0 = 5.2788e-04
Loss = 2.6632e-01, PNorm = 69.1223, GNorm = 1.0905, lr_0 = 5.3162e-04
Loss = 2.2383e-01, PNorm = 69.1769, GNorm = 0.7308, lr_0 = 5.3538e-04
Loss = 2.2501e-01, PNorm = 69.2336, GNorm = 0.8641, lr_0 = 5.3912e-04
Loss = 2.7680e-01, PNorm = 69.2886, GNorm = 1.7973, lr_0 = 5.4288e-04
Loss = 2.5023e-01, PNorm = 69.3529, GNorm = 0.8034, lr_0 = 5.4663e-04
Loss = 2.5454e-01, PNorm = 69.4211, GNorm = 1.5380, lr_0 = 5.5038e-04
Validation mae = 0.321621
Epoch 1
Loss = 1.7430e-01, PNorm = 69.4843, GNorm = 1.0633, lr_0 = 5.5413e-04
Loss = 1.8803e-01, PNorm = 69.5417, GNorm = 0.6842, lr_0 = 5.5787e-04
Loss = 1.6736e-01, PNorm = 69.5984, GNorm = 0.8697, lr_0 = 5.6163e-04
Loss = 1.6892e-01, PNorm = 69.6543, GNorm = 1.0166, lr_0 = 5.6538e-04
Loss = 1.6805e-01, PNorm = 69.7173, GNorm = 0.9116, lr_0 = 5.6913e-04
Loss = 1.6568e-01, PNorm = 69.7701, GNorm = 1.1717, lr_0 = 5.7288e-04
Loss = 1.5243e-01, PNorm = 69.8312, GNorm = 1.1306, lr_0 = 5.7662e-04
Loss = 1.6458e-01, PNorm = 69.8854, GNorm = 1.0006, lr_0 = 5.8038e-04
Loss = 1.5019e-01, PNorm = 69.9423, GNorm = 0.6628, lr_0 = 5.8413e-04
Loss = 1.6724e-01, PNorm = 70.0117, GNorm = 0.9328, lr_0 = 5.8788e-04
Loss = 1.8408e-01, PNorm = 70.0804, GNorm = 1.0616, lr_0 = 5.9163e-04
Loss = 1.7894e-01, PNorm = 70.1485, GNorm = 0.9468, lr_0 = 5.9538e-04
Loss = 1.9351e-01, PNorm = 70.2249, GNorm = 0.9225, lr_0 = 5.9913e-04
Loss = 1.7476e-01, PNorm = 70.2996, GNorm = 0.7806, lr_0 = 6.0288e-04
Loss = 1.5413e-01, PNorm = 70.3804, GNorm = 0.7268, lr_0 = 6.0663e-04
Loss = 1.8766e-01, PNorm = 70.4523, GNorm = 0.9924, lr_0 = 6.1038e-04
Loss = 2.1819e-01, PNorm = 70.5292, GNorm = 0.9539, lr_0 = 6.1413e-04
Loss = 1.6696e-01, PNorm = 70.6114, GNorm = 0.9420, lr_0 = 6.1788e-04
Loss = 1.9519e-01, PNorm = 70.6850, GNorm = 0.8505, lr_0 = 6.2163e-04
Loss = 2.0960e-01, PNorm = 70.7739, GNorm = 1.6063, lr_0 = 6.2538e-04
Loss = 1.8323e-01, PNorm = 70.8639, GNorm = 0.8228, lr_0 = 6.2913e-04
Loss = 1.7549e-01, PNorm = 70.9580, GNorm = 0.9333, lr_0 = 6.3288e-04
Loss = 1.6859e-01, PNorm = 71.0337, GNorm = 0.9982, lr_0 = 6.3663e-04
Loss = 2.0675e-01, PNorm = 71.1247, GNorm = 0.7633, lr_0 = 6.4038e-04
Loss = 1.7206e-01, PNorm = 71.2116, GNorm = 1.0714, lr_0 = 6.4413e-04
Loss = 1.7349e-01, PNorm = 71.3010, GNorm = 0.8032, lr_0 = 6.4788e-04
Loss = 1.7222e-01, PNorm = 71.3841, GNorm = 1.0836, lr_0 = 6.5163e-04
Loss = 1.8807e-01, PNorm = 71.4749, GNorm = 0.9643, lr_0 = 6.5538e-04
Loss = 1.6665e-01, PNorm = 71.5663, GNorm = 0.8810, lr_0 = 6.5913e-04
Loss = 1.8267e-01, PNorm = 71.6633, GNorm = 1.1239, lr_0 = 6.6288e-04
Loss = 1.8678e-01, PNorm = 71.7539, GNorm = 0.8026, lr_0 = 6.6663e-04
Loss = 1.9709e-01, PNorm = 71.8503, GNorm = 1.0450, lr_0 = 6.7038e-04
Loss = 2.0283e-01, PNorm = 71.9548, GNorm = 1.0979, lr_0 = 6.7413e-04
Loss = 1.8842e-01, PNorm = 72.0500, GNorm = 1.0520, lr_0 = 6.7788e-04
Loss = 1.7398e-01, PNorm = 72.1465, GNorm = 0.8945, lr_0 = 6.8163e-04
Loss = 2.1384e-01, PNorm = 72.2353, GNorm = 0.8095, lr_0 = 6.8538e-04
Loss = 2.0024e-01, PNorm = 72.3345, GNorm = 1.0257, lr_0 = 6.8913e-04
Loss = 1.8921e-01, PNorm = 72.4324, GNorm = 1.0693, lr_0 = 6.9288e-04
Loss = 2.4891e-01, PNorm = 72.5466, GNorm = 0.9214, lr_0 = 6.9663e-04
Loss = 1.9565e-01, PNorm = 72.6553, GNorm = 0.7901, lr_0 = 7.0038e-04
Loss = 1.8845e-01, PNorm = 72.7662, GNorm = 1.2096, lr_0 = 7.0413e-04
Loss = 2.0104e-01, PNorm = 72.8697, GNorm = 0.7795, lr_0 = 7.0788e-04
Loss = 1.7535e-01, PNorm = 72.9681, GNorm = 0.8774, lr_0 = 7.1163e-04
Loss = 2.0577e-01, PNorm = 73.0646, GNorm = 1.5447, lr_0 = 7.1538e-04
Loss = 1.8372e-01, PNorm = 73.1657, GNorm = 1.0891, lr_0 = 7.1913e-04
Loss = 1.7133e-01, PNorm = 73.2627, GNorm = 0.8424, lr_0 = 7.2288e-04
Loss = 1.9560e-01, PNorm = 73.3625, GNorm = 0.9648, lr_0 = 7.2663e-04
Loss = 2.2562e-01, PNorm = 73.4699, GNorm = 1.1425, lr_0 = 7.3038e-04
Loss = 2.0980e-01, PNorm = 73.5713, GNorm = 1.0402, lr_0 = 7.3413e-04
Loss = 2.1097e-01, PNorm = 73.6836, GNorm = 1.1548, lr_0 = 7.3788e-04
Loss = 1.8818e-01, PNorm = 73.7884, GNorm = 0.9731, lr_0 = 7.4163e-04
Loss = 1.9974e-01, PNorm = 73.8877, GNorm = 0.9372, lr_0 = 7.4538e-04
Loss = 1.9671e-01, PNorm = 73.9943, GNorm = 1.0504, lr_0 = 7.4913e-04
Loss = 2.1091e-01, PNorm = 74.1047, GNorm = 0.8281, lr_0 = 7.5288e-04
Loss = 1.9230e-01, PNorm = 74.2255, GNorm = 0.6332, lr_0 = 7.5663e-04
Loss = 1.9570e-01, PNorm = 74.3391, GNorm = 1.0439, lr_0 = 7.6038e-04
Loss = 1.9093e-01, PNorm = 74.4533, GNorm = 0.8638, lr_0 = 7.6413e-04
Loss = 2.2959e-01, PNorm = 74.5673, GNorm = 0.9602, lr_0 = 7.6788e-04
Loss = 2.2874e-01, PNorm = 74.6924, GNorm = 1.3554, lr_0 = 7.7163e-04
Loss = 1.9967e-01, PNorm = 74.8141, GNorm = 0.8730, lr_0 = 7.7538e-04
Loss = 2.0118e-01, PNorm = 74.9409, GNorm = 1.0239, lr_0 = 7.7913e-04
Loss = 2.1369e-01, PNorm = 75.0647, GNorm = 0.8700, lr_0 = 7.8288e-04
Loss = 1.9433e-01, PNorm = 75.1958, GNorm = 0.7230, lr_0 = 7.8663e-04
Loss = 1.9119e-01, PNorm = 75.3187, GNorm = 0.9742, lr_0 = 7.9038e-04
Loss = 2.0918e-01, PNorm = 75.4470, GNorm = 1.0131, lr_0 = 7.9413e-04
Loss = 2.0102e-01, PNorm = 75.5762, GNorm = 0.7040, lr_0 = 7.9788e-04
Loss = 2.2383e-01, PNorm = 75.6977, GNorm = 0.9634, lr_0 = 8.0163e-04
Loss = 2.1516e-01, PNorm = 75.8148, GNorm = 1.0597, lr_0 = 8.0538e-04
Loss = 2.1499e-01, PNorm = 75.9409, GNorm = 0.9818, lr_0 = 8.0913e-04
Loss = 1.9239e-01, PNorm = 76.0644, GNorm = 0.6521, lr_0 = 8.1288e-04
Loss = 2.1834e-01, PNorm = 76.1777, GNorm = 1.0660, lr_0 = 8.1663e-04
Loss = 1.6665e-01, PNorm = 76.3034, GNorm = 0.6382, lr_0 = 8.2038e-04
Loss = 2.1365e-01, PNorm = 76.4232, GNorm = 0.7390, lr_0 = 8.2413e-04
Loss = 2.0495e-01, PNorm = 76.5529, GNorm = 0.9233, lr_0 = 8.2788e-04
Loss = 2.0169e-01, PNorm = 76.6937, GNorm = 0.9852, lr_0 = 8.3163e-04
Loss = 2.2083e-01, PNorm = 76.8330, GNorm = 0.8797, lr_0 = 8.3538e-04
Loss = 2.0897e-01, PNorm = 76.9764, GNorm = 0.9411, lr_0 = 8.3913e-04
Loss = 2.1937e-01, PNorm = 77.1132, GNorm = 1.0355, lr_0 = 8.4288e-04
Loss = 2.0654e-01, PNorm = 77.2604, GNorm = 1.0087, lr_0 = 8.4663e-04
Loss = 2.1590e-01, PNorm = 77.3986, GNorm = 0.7507, lr_0 = 8.5038e-04
Loss = 2.0660e-01, PNorm = 77.5419, GNorm = 0.9422, lr_0 = 8.5413e-04
Loss = 2.2250e-01, PNorm = 77.6782, GNorm = 1.6543, lr_0 = 8.5788e-04
Loss = 2.2692e-01, PNorm = 77.8314, GNorm = 0.8828, lr_0 = 8.6163e-04
Loss = 2.0777e-01, PNorm = 77.9751, GNorm = 1.0516, lr_0 = 8.6538e-04
Loss = 2.1837e-01, PNorm = 78.1058, GNorm = 0.7823, lr_0 = 8.6913e-04
Loss = 2.0609e-01, PNorm = 78.2350, GNorm = 0.6009, lr_0 = 8.7288e-04
Loss = 2.0438e-01, PNorm = 78.3690, GNorm = 0.8411, lr_0 = 8.7663e-04
Loss = 2.0063e-01, PNorm = 78.4978, GNorm = 0.6222, lr_0 = 8.8038e-04
Loss = 1.9114e-01, PNorm = 78.6175, GNorm = 0.8147, lr_0 = 8.8413e-04
Loss = 2.3099e-01, PNorm = 78.7491, GNorm = 0.7493, lr_0 = 8.8788e-04
Loss = 1.9529e-01, PNorm = 78.8862, GNorm = 1.0539, lr_0 = 8.9163e-04
Loss = 2.1121e-01, PNorm = 79.0171, GNorm = 0.6857, lr_0 = 8.9538e-04
Loss = 1.9506e-01, PNorm = 79.1494, GNorm = 0.9716, lr_0 = 8.9913e-04
Loss = 2.2608e-01, PNorm = 79.2825, GNorm = 0.7979, lr_0 = 9.0288e-04
Loss = 2.1511e-01, PNorm = 79.4291, GNorm = 0.9615, lr_0 = 9.0663e-04
Loss = 2.2211e-01, PNorm = 79.5664, GNorm = 0.9087, lr_0 = 9.1038e-04
Loss = 2.2073e-01, PNorm = 79.7213, GNorm = 1.0337, lr_0 = 9.1413e-04
Loss = 2.2605e-01, PNorm = 79.8567, GNorm = 0.9079, lr_0 = 9.1788e-04
Loss = 2.1982e-01, PNorm = 79.9991, GNorm = 1.3899, lr_0 = 9.2163e-04
Loss = 2.1183e-01, PNorm = 80.1246, GNorm = 0.6527, lr_0 = 9.2538e-04
Loss = 1.9453e-01, PNorm = 80.2462, GNorm = 0.7449, lr_0 = 9.2913e-04
Loss = 2.1383e-01, PNorm = 80.3763, GNorm = 0.6955, lr_0 = 9.3288e-04
Loss = 1.9782e-01, PNorm = 80.4972, GNorm = 0.8869, lr_0 = 9.3663e-04
Loss = 1.9680e-01, PNorm = 80.6212, GNorm = 0.8565, lr_0 = 9.4038e-04
Loss = 2.0501e-01, PNorm = 80.7555, GNorm = 0.7713, lr_0 = 9.4413e-04
Loss = 2.0152e-01, PNorm = 80.8730, GNorm = 1.1015, lr_0 = 9.4788e-04
Loss = 2.1461e-01, PNorm = 81.0179, GNorm = 1.6428, lr_0 = 9.5163e-04
Loss = 1.9661e-01, PNorm = 81.1727, GNorm = 0.5194, lr_0 = 9.5538e-04
Loss = 2.3167e-01, PNorm = 81.3282, GNorm = 0.6489, lr_0 = 9.5913e-04
Loss = 2.0270e-01, PNorm = 81.4891, GNorm = 0.8791, lr_0 = 9.6288e-04
Loss = 2.1819e-01, PNorm = 81.6255, GNorm = 0.9981, lr_0 = 9.6663e-04
Loss = 2.1498e-01, PNorm = 81.7732, GNorm = 1.1142, lr_0 = 9.7038e-04
Loss = 2.4099e-01, PNorm = 81.9182, GNorm = 0.6150, lr_0 = 9.7413e-04
Loss = 2.1388e-01, PNorm = 82.0728, GNorm = 0.7342, lr_0 = 9.7788e-04
Loss = 2.1052e-01, PNorm = 82.2142, GNorm = 0.9730, lr_0 = 9.8163e-04
Loss = 2.2439e-01, PNorm = 82.3592, GNorm = 1.0104, lr_0 = 9.8537e-04
Loss = 2.4346e-01, PNorm = 82.5120, GNorm = 1.3678, lr_0 = 9.8912e-04
Loss = 2.0632e-01, PNorm = 82.6540, GNorm = 0.5812, lr_0 = 9.9288e-04
Loss = 1.7705e-01, PNorm = 82.7973, GNorm = 0.9783, lr_0 = 9.9663e-04
Loss = 1.9955e-01, PNorm = 82.9181, GNorm = 0.5597, lr_0 = 9.9993e-04
Validation mae = 0.308223
Epoch 2
Loss = 1.4274e-01, PNorm = 83.0528, GNorm = 0.8041, lr_0 = 9.9925e-04
Loss = 1.3155e-01, PNorm = 83.1631, GNorm = 0.8135, lr_0 = 9.9856e-04
Loss = 1.4218e-01, PNorm = 83.2754, GNorm = 0.3827, lr_0 = 9.9788e-04
Loss = 1.2817e-01, PNorm = 83.3713, GNorm = 0.5471, lr_0 = 9.9719e-04
Loss = 1.3563e-01, PNorm = 83.4812, GNorm = 0.5063, lr_0 = 9.9651e-04
Loss = 1.3122e-01, PNorm = 83.5813, GNorm = 1.0378, lr_0 = 9.9583e-04
Loss = 1.2956e-01, PNorm = 83.7181, GNorm = 0.5602, lr_0 = 9.9515e-04
Loss = 1.3667e-01, PNorm = 83.8340, GNorm = 0.7378, lr_0 = 9.9446e-04
Loss = 1.0616e-01, PNorm = 83.9599, GNorm = 0.6325, lr_0 = 9.9378e-04
Loss = 1.2517e-01, PNorm = 84.0623, GNorm = 0.5591, lr_0 = 9.9310e-04
Loss = 1.3158e-01, PNorm = 84.1864, GNorm = 0.9414, lr_0 = 9.9242e-04
Loss = 1.3938e-01, PNorm = 84.3088, GNorm = 0.5901, lr_0 = 9.9174e-04
Loss = 1.2020e-01, PNorm = 84.4318, GNorm = 0.5267, lr_0 = 9.9106e-04
Loss = 1.2979e-01, PNorm = 84.5597, GNorm = 0.5648, lr_0 = 9.9038e-04
Loss = 1.2719e-01, PNorm = 84.6865, GNorm = 0.7586, lr_0 = 9.8971e-04
Loss = 1.1928e-01, PNorm = 84.8165, GNorm = 0.6333, lr_0 = 9.8903e-04
Loss = 1.2113e-01, PNorm = 84.9301, GNorm = 0.4056, lr_0 = 9.8835e-04
Loss = 1.0127e-01, PNorm = 85.0449, GNorm = 0.6359, lr_0 = 9.8767e-04
Loss = 1.1860e-01, PNorm = 85.1553, GNorm = 0.7344, lr_0 = 9.8700e-04
Loss = 1.2349e-01, PNorm = 85.2707, GNorm = 1.0785, lr_0 = 9.8632e-04
Loss = 1.3657e-01, PNorm = 85.3982, GNorm = 0.5136, lr_0 = 9.8564e-04
Loss = 1.3607e-01, PNorm = 85.5179, GNorm = 0.5535, lr_0 = 9.8497e-04
Loss = 1.3357e-01, PNorm = 85.6577, GNorm = 0.7449, lr_0 = 9.8429e-04
Loss = 1.2718e-01, PNorm = 85.7692, GNorm = 0.6265, lr_0 = 9.8362e-04
Loss = 1.1807e-01, PNorm = 85.8878, GNorm = 0.7425, lr_0 = 9.8295e-04
Loss = 1.4652e-01, PNorm = 86.0045, GNorm = 0.8356, lr_0 = 9.8227e-04
Loss = 1.3603e-01, PNorm = 86.1266, GNorm = 0.6181, lr_0 = 9.8160e-04
Loss = 1.2382e-01, PNorm = 86.2549, GNorm = 0.4982, lr_0 = 9.8093e-04
Loss = 1.1312e-01, PNorm = 86.3840, GNorm = 0.5571, lr_0 = 9.8026e-04
Loss = 1.3322e-01, PNorm = 86.5040, GNorm = 0.5121, lr_0 = 9.7958e-04
Loss = 1.2659e-01, PNorm = 86.6277, GNorm = 0.4745, lr_0 = 9.7891e-04
Loss = 1.4743e-01, PNorm = 86.7596, GNorm = 0.9697, lr_0 = 9.7824e-04
Loss = 1.3819e-01, PNorm = 86.8841, GNorm = 0.6266, lr_0 = 9.7757e-04
Loss = 1.5067e-01, PNorm = 87.0192, GNorm = 0.9210, lr_0 = 9.7690e-04
Loss = 1.3884e-01, PNorm = 87.1514, GNorm = 0.6796, lr_0 = 9.7623e-04
Loss = 1.3905e-01, PNorm = 87.2747, GNorm = 1.0476, lr_0 = 9.7556e-04
Loss = 1.2037e-01, PNorm = 87.3949, GNorm = 0.5539, lr_0 = 9.7490e-04
Loss = 1.2728e-01, PNorm = 87.5096, GNorm = 0.4812, lr_0 = 9.7423e-04
Loss = 1.5780e-01, PNorm = 87.6296, GNorm = 0.6712, lr_0 = 9.7356e-04
Loss = 1.5466e-01, PNorm = 87.7732, GNorm = 0.6099, lr_0 = 9.7289e-04
Loss = 1.3449e-01, PNorm = 87.9007, GNorm = 0.6854, lr_0 = 9.7223e-04
Loss = 1.4533e-01, PNorm = 88.0359, GNorm = 0.9138, lr_0 = 9.7156e-04
Loss = 1.3144e-01, PNorm = 88.1519, GNorm = 1.0543, lr_0 = 9.7090e-04
Loss = 1.2135e-01, PNorm = 88.2801, GNorm = 1.0755, lr_0 = 9.7023e-04
Loss = 1.3056e-01, PNorm = 88.4176, GNorm = 0.7514, lr_0 = 9.6957e-04
Loss = 1.4496e-01, PNorm = 88.5408, GNorm = 0.9482, lr_0 = 9.6890e-04
Loss = 1.3466e-01, PNorm = 88.6721, GNorm = 1.0897, lr_0 = 9.6824e-04
Loss = 1.4121e-01, PNorm = 88.8036, GNorm = 0.7230, lr_0 = 9.6757e-04
Loss = 1.4673e-01, PNorm = 88.9394, GNorm = 1.3101, lr_0 = 9.6691e-04
Loss = 1.2364e-01, PNorm = 89.0806, GNorm = 0.6556, lr_0 = 9.6625e-04
Loss = 1.3490e-01, PNorm = 89.2217, GNorm = 0.6706, lr_0 = 9.6559e-04
Loss = 1.3250e-01, PNorm = 89.3607, GNorm = 0.6799, lr_0 = 9.6493e-04
Loss = 1.2917e-01, PNorm = 89.4866, GNorm = 0.6968, lr_0 = 9.6427e-04
Loss = 1.2841e-01, PNorm = 89.6143, GNorm = 0.8444, lr_0 = 9.6360e-04
Loss = 1.3507e-01, PNorm = 89.7445, GNorm = 0.8838, lr_0 = 9.6294e-04
Loss = 1.4509e-01, PNorm = 89.8668, GNorm = 0.9721, lr_0 = 9.6228e-04
Loss = 1.4493e-01, PNorm = 90.0000, GNorm = 0.8986, lr_0 = 9.6163e-04
Loss = 1.5976e-01, PNorm = 90.1325, GNorm = 0.9435, lr_0 = 9.6097e-04
Loss = 1.2464e-01, PNorm = 90.2788, GNorm = 0.7258, lr_0 = 9.6031e-04
Loss = 1.4599e-01, PNorm = 90.4070, GNorm = 0.5407, lr_0 = 9.5965e-04
Loss = 1.3070e-01, PNorm = 90.5377, GNorm = 0.7355, lr_0 = 9.5899e-04
Loss = 1.3312e-01, PNorm = 90.6664, GNorm = 0.4496, lr_0 = 9.5834e-04
Loss = 1.4854e-01, PNorm = 90.7902, GNorm = 0.5193, lr_0 = 9.5768e-04
Loss = 1.3056e-01, PNorm = 90.9099, GNorm = 0.5940, lr_0 = 9.5702e-04
Loss = 1.2156e-01, PNorm = 91.0382, GNorm = 1.0024, lr_0 = 9.5637e-04
Loss = 1.3311e-01, PNorm = 91.1617, GNorm = 0.5926, lr_0 = 9.5571e-04
Loss = 1.1802e-01, PNorm = 91.2892, GNorm = 0.7277, lr_0 = 9.5506e-04
Loss = 1.3534e-01, PNorm = 91.4112, GNorm = 0.8336, lr_0 = 9.5440e-04
Loss = 1.4685e-01, PNorm = 91.5389, GNorm = 0.5635, lr_0 = 9.5375e-04
Loss = 1.4428e-01, PNorm = 91.6651, GNorm = 0.8736, lr_0 = 9.5310e-04
Loss = 1.2317e-01, PNorm = 91.7866, GNorm = 0.4870, lr_0 = 9.5244e-04
Loss = 1.4292e-01, PNorm = 91.9041, GNorm = 0.4778, lr_0 = 9.5179e-04
Loss = 1.3986e-01, PNorm = 92.0328, GNorm = 0.5093, lr_0 = 9.5114e-04
Loss = 1.3495e-01, PNorm = 92.1615, GNorm = 0.6261, lr_0 = 9.5049e-04
Loss = 1.4410e-01, PNorm = 92.2908, GNorm = 1.4466, lr_0 = 9.4984e-04
Loss = 1.5290e-01, PNorm = 92.4275, GNorm = 0.9616, lr_0 = 9.4919e-04
Loss = 1.4040e-01, PNorm = 92.5577, GNorm = 0.6534, lr_0 = 9.4854e-04
Loss = 1.3437e-01, PNorm = 92.6831, GNorm = 0.4929, lr_0 = 9.4789e-04
Loss = 1.3578e-01, PNorm = 92.8109, GNorm = 0.7861, lr_0 = 9.4724e-04
Loss = 1.4528e-01, PNorm = 92.9323, GNorm = 0.7115, lr_0 = 9.4659e-04
Loss = 1.2539e-01, PNorm = 93.0570, GNorm = 0.8392, lr_0 = 9.4594e-04
Loss = 1.5151e-01, PNorm = 93.1756, GNorm = 1.1721, lr_0 = 9.4529e-04
Loss = 1.4663e-01, PNorm = 93.2873, GNorm = 0.5307, lr_0 = 9.4464e-04
Loss = 1.4870e-01, PNorm = 93.4094, GNorm = 0.6160, lr_0 = 9.4400e-04
Loss = 1.2778e-01, PNorm = 93.5364, GNorm = 0.7226, lr_0 = 9.4335e-04
Loss = 1.4176e-01, PNorm = 93.6532, GNorm = 0.7159, lr_0 = 9.4270e-04
Loss = 1.4502e-01, PNorm = 93.7890, GNorm = 0.5263, lr_0 = 9.4206e-04
Loss = 1.7506e-01, PNorm = 93.9273, GNorm = 0.5714, lr_0 = 9.4141e-04
Loss = 1.3605e-01, PNorm = 94.0707, GNorm = 0.8672, lr_0 = 9.4077e-04
Loss = 1.4661e-01, PNorm = 94.1921, GNorm = 1.0943, lr_0 = 9.4012e-04
Loss = 1.4883e-01, PNorm = 94.3219, GNorm = 1.0356, lr_0 = 9.3948e-04
Loss = 1.4556e-01, PNorm = 94.4522, GNorm = 0.4891, lr_0 = 9.3884e-04
Loss = 1.5262e-01, PNorm = 94.6007, GNorm = 1.0908, lr_0 = 9.3819e-04
Loss = 1.4308e-01, PNorm = 94.7279, GNorm = 1.0094, lr_0 = 9.3755e-04
Loss = 1.4351e-01, PNorm = 94.8643, GNorm = 0.3712, lr_0 = 9.3691e-04
Loss = 1.4239e-01, PNorm = 94.9936, GNorm = 0.6255, lr_0 = 9.3627e-04
Loss = 1.6487e-01, PNorm = 95.1178, GNorm = 1.0022, lr_0 = 9.3562e-04
Loss = 1.3653e-01, PNorm = 95.2686, GNorm = 0.7076, lr_0 = 9.3498e-04
Loss = 1.4757e-01, PNorm = 95.3960, GNorm = 0.8448, lr_0 = 9.3434e-04
Loss = 1.4400e-01, PNorm = 95.5215, GNorm = 1.0213, lr_0 = 9.3370e-04
Loss = 1.5971e-01, PNorm = 95.6434, GNorm = 0.9092, lr_0 = 9.3306e-04
Loss = 1.4430e-01, PNorm = 95.7735, GNorm = 0.6118, lr_0 = 9.3242e-04
Loss = 1.4525e-01, PNorm = 95.9102, GNorm = 1.1045, lr_0 = 9.3178e-04
Loss = 1.5138e-01, PNorm = 96.0322, GNorm = 0.5285, lr_0 = 9.3115e-04
Loss = 1.5617e-01, PNorm = 96.1734, GNorm = 1.0558, lr_0 = 9.3051e-04
Loss = 1.4661e-01, PNorm = 96.3150, GNorm = 0.4181, lr_0 = 9.2987e-04
Loss = 1.4265e-01, PNorm = 96.4437, GNorm = 0.5577, lr_0 = 9.2923e-04
Loss = 1.3401e-01, PNorm = 96.5737, GNorm = 1.1080, lr_0 = 9.2860e-04
Loss = 1.4730e-01, PNorm = 96.6999, GNorm = 0.9189, lr_0 = 9.2796e-04
Loss = 1.4404e-01, PNorm = 96.8283, GNorm = 0.6381, lr_0 = 9.2733e-04
Loss = 1.6208e-01, PNorm = 96.9364, GNorm = 0.7182, lr_0 = 9.2669e-04
Loss = 1.7161e-01, PNorm = 97.0663, GNorm = 1.1618, lr_0 = 9.2606e-04
Loss = 1.4435e-01, PNorm = 97.1923, GNorm = 0.6492, lr_0 = 9.2542e-04
Loss = 1.3193e-01, PNorm = 97.3252, GNorm = 1.0604, lr_0 = 9.2479e-04
Loss = 1.3957e-01, PNorm = 97.4441, GNorm = 0.9798, lr_0 = 9.2415e-04
Loss = 1.4559e-01, PNorm = 97.5747, GNorm = 0.9452, lr_0 = 9.2352e-04
Loss = 1.4571e-01, PNorm = 97.6954, GNorm = 0.5637, lr_0 = 9.2289e-04
Loss = 1.2738e-01, PNorm = 97.8170, GNorm = 0.8077, lr_0 = 9.2226e-04
Loss = 1.4780e-01, PNorm = 97.9331, GNorm = 0.7139, lr_0 = 9.2162e-04
Loss = 1.4664e-01, PNorm = 98.0576, GNorm = 0.6668, lr_0 = 9.2099e-04
Validation mae = 0.300078
Epoch 3
Loss = 7.5361e-02, PNorm = 98.1682, GNorm = 0.3590, lr_0 = 9.2036e-04
Loss = 8.3817e-02, PNorm = 98.2616, GNorm = 0.3027, lr_0 = 9.1973e-04
Loss = 7.8640e-02, PNorm = 98.3519, GNorm = 0.8860, lr_0 = 9.1910e-04
Loss = 8.4110e-02, PNorm = 98.4368, GNorm = 0.7248, lr_0 = 9.1847e-04
Loss = 8.2589e-02, PNorm = 98.5184, GNorm = 0.3954, lr_0 = 9.1784e-04
Loss = 7.4918e-02, PNorm = 98.5939, GNorm = 0.3566, lr_0 = 9.1721e-04
Loss = 7.5449e-02, PNorm = 98.6736, GNorm = 0.4366, lr_0 = 9.1658e-04
Loss = 8.6168e-02, PNorm = 98.7470, GNorm = 0.6002, lr_0 = 9.1596e-04
Loss = 7.8124e-02, PNorm = 98.8164, GNorm = 0.5389, lr_0 = 9.1533e-04
Loss = 7.7071e-02, PNorm = 98.9014, GNorm = 0.4317, lr_0 = 9.1470e-04
Loss = 9.2197e-02, PNorm = 98.9893, GNorm = 0.4817, lr_0 = 9.1408e-04
Loss = 7.6967e-02, PNorm = 99.0729, GNorm = 0.4613, lr_0 = 9.1345e-04
Loss = 7.1633e-02, PNorm = 99.1562, GNorm = 0.4329, lr_0 = 9.1282e-04
Loss = 8.2765e-02, PNorm = 99.2302, GNorm = 0.6150, lr_0 = 9.1220e-04
Loss = 8.3654e-02, PNorm = 99.3079, GNorm = 0.6633, lr_0 = 9.1157e-04
Loss = 7.5045e-02, PNorm = 99.3892, GNorm = 0.5431, lr_0 = 9.1095e-04
Loss = 7.3211e-02, PNorm = 99.4627, GNorm = 0.5073, lr_0 = 9.1032e-04
Loss = 9.3461e-02, PNorm = 99.5299, GNorm = 0.8129, lr_0 = 9.0970e-04
Loss = 8.3193e-02, PNorm = 99.6134, GNorm = 1.0584, lr_0 = 9.0908e-04
Loss = 7.4115e-02, PNorm = 99.7015, GNorm = 0.5298, lr_0 = 9.0846e-04
Loss = 7.2950e-02, PNorm = 99.7764, GNorm = 0.5178, lr_0 = 9.0783e-04
Loss = 7.7546e-02, PNorm = 99.8602, GNorm = 0.4175, lr_0 = 9.0721e-04
Loss = 7.7176e-02, PNorm = 99.9304, GNorm = 1.1702, lr_0 = 9.0659e-04
Loss = 7.0775e-02, PNorm = 100.0073, GNorm = 0.4672, lr_0 = 9.0597e-04
Loss = 8.9077e-02, PNorm = 100.0774, GNorm = 0.4071, lr_0 = 9.0535e-04
Loss = 7.6880e-02, PNorm = 100.1769, GNorm = 0.7940, lr_0 = 9.0473e-04
Loss = 8.1473e-02, PNorm = 100.2536, GNorm = 1.0841, lr_0 = 9.0411e-04
Loss = 8.7807e-02, PNorm = 100.3516, GNorm = 1.5411, lr_0 = 9.0349e-04
Loss = 7.8076e-02, PNorm = 100.4349, GNorm = 0.4554, lr_0 = 9.0287e-04
Loss = 7.0321e-02, PNorm = 100.5232, GNorm = 0.3603, lr_0 = 9.0225e-04
Loss = 7.4017e-02, PNorm = 100.6096, GNorm = 0.4480, lr_0 = 9.0163e-04
Loss = 7.3337e-02, PNorm = 100.6783, GNorm = 0.3952, lr_0 = 9.0102e-04
Loss = 8.2916e-02, PNorm = 100.7614, GNorm = 1.0220, lr_0 = 9.0040e-04
Loss = 7.4478e-02, PNorm = 100.8453, GNorm = 0.4546, lr_0 = 8.9978e-04
Loss = 7.6057e-02, PNorm = 100.9268, GNorm = 0.8570, lr_0 = 8.9916e-04
Loss = 9.2168e-02, PNorm = 101.0056, GNorm = 0.5331, lr_0 = 8.9855e-04
Loss = 7.0016e-02, PNorm = 101.0979, GNorm = 0.3906, lr_0 = 8.9793e-04
Loss = 8.6998e-02, PNorm = 101.1807, GNorm = 0.4268, lr_0 = 8.9732e-04
Loss = 8.2387e-02, PNorm = 101.2675, GNorm = 0.6324, lr_0 = 8.9670e-04
Loss = 8.4341e-02, PNorm = 101.3550, GNorm = 0.3245, lr_0 = 8.9609e-04
Loss = 7.9634e-02, PNorm = 101.4526, GNorm = 0.6188, lr_0 = 8.9548e-04
Loss = 7.4560e-02, PNorm = 101.5351, GNorm = 0.5288, lr_0 = 8.9486e-04
Loss = 8.7127e-02, PNorm = 101.6380, GNorm = 0.4286, lr_0 = 8.9425e-04
Loss = 9.4577e-02, PNorm = 101.7323, GNorm = 1.1156, lr_0 = 8.9364e-04
Loss = 8.5859e-02, PNorm = 101.8371, GNorm = 0.5802, lr_0 = 8.9302e-04
Loss = 8.2133e-02, PNorm = 101.9369, GNorm = 1.1511, lr_0 = 8.9241e-04
Loss = 1.0325e-01, PNorm = 102.0285, GNorm = 0.8200, lr_0 = 8.9180e-04
Loss = 1.0206e-01, PNorm = 102.1275, GNorm = 0.8574, lr_0 = 8.9119e-04
Loss = 9.6107e-02, PNorm = 102.2443, GNorm = 0.3889, lr_0 = 8.9058e-04
Loss = 8.6003e-02, PNorm = 102.3478, GNorm = 0.6624, lr_0 = 8.8997e-04
Loss = 7.4617e-02, PNorm = 102.4502, GNorm = 0.5627, lr_0 = 8.8936e-04
Loss = 8.8338e-02, PNorm = 102.5394, GNorm = 0.8217, lr_0 = 8.8875e-04
Loss = 8.2139e-02, PNorm = 102.6352, GNorm = 0.6368, lr_0 = 8.8814e-04
Loss = 1.0033e-01, PNorm = 102.7324, GNorm = 0.5803, lr_0 = 8.8753e-04
Loss = 9.2133e-02, PNorm = 102.8300, GNorm = 0.7934, lr_0 = 8.8693e-04
Loss = 8.7804e-02, PNorm = 102.9317, GNorm = 0.7070, lr_0 = 8.8632e-04
Loss = 9.8734e-02, PNorm = 103.0310, GNorm = 1.1934, lr_0 = 8.8571e-04
Loss = 9.2105e-02, PNorm = 103.1329, GNorm = 0.7252, lr_0 = 8.8510e-04
Loss = 8.7304e-02, PNorm = 103.2438, GNorm = 0.4354, lr_0 = 8.8450e-04
Loss = 9.1682e-02, PNorm = 103.3541, GNorm = 0.3801, lr_0 = 8.8389e-04
Loss = 8.9013e-02, PNorm = 103.4588, GNorm = 1.1261, lr_0 = 8.8329e-04
Loss = 8.4949e-02, PNorm = 103.5720, GNorm = 1.0681, lr_0 = 8.8268e-04
Loss = 1.0916e-01, PNorm = 103.6777, GNorm = 0.4933, lr_0 = 8.8208e-04
Loss = 9.0591e-02, PNorm = 103.7890, GNorm = 0.4662, lr_0 = 8.8147e-04
Loss = 1.0800e-01, PNorm = 103.9070, GNorm = 0.6441, lr_0 = 8.8087e-04
Loss = 8.5151e-02, PNorm = 104.0177, GNorm = 1.1224, lr_0 = 8.8026e-04
Loss = 9.1204e-02, PNorm = 104.1388, GNorm = 1.0131, lr_0 = 8.7966e-04
Loss = 8.0848e-02, PNorm = 104.2331, GNorm = 0.4236, lr_0 = 8.7906e-04
Loss = 9.9209e-02, PNorm = 104.3341, GNorm = 0.3815, lr_0 = 8.7846e-04
Loss = 9.7869e-02, PNorm = 104.4333, GNorm = 0.5144, lr_0 = 8.7785e-04
Loss = 8.0646e-02, PNorm = 104.5346, GNorm = 1.1464, lr_0 = 8.7725e-04
Loss = 7.7316e-02, PNorm = 104.6216, GNorm = 0.3126, lr_0 = 8.7665e-04
Loss = 8.2455e-02, PNorm = 104.7119, GNorm = 0.4170, lr_0 = 8.7605e-04
Loss = 9.1991e-02, PNorm = 104.8022, GNorm = 0.4583, lr_0 = 8.7545e-04
Loss = 8.8229e-02, PNorm = 104.9099, GNorm = 0.4986, lr_0 = 8.7485e-04
Loss = 8.1770e-02, PNorm = 105.0127, GNorm = 0.4552, lr_0 = 8.7425e-04
Loss = 8.6564e-02, PNorm = 105.1106, GNorm = 0.8253, lr_0 = 8.7365e-04
Loss = 8.7382e-02, PNorm = 105.2214, GNorm = 1.3804, lr_0 = 8.7306e-04
Loss = 9.5811e-02, PNorm = 105.3210, GNorm = 0.5115, lr_0 = 8.7246e-04
Loss = 8.7919e-02, PNorm = 105.4274, GNorm = 0.5716, lr_0 = 8.7186e-04
Loss = 1.0630e-01, PNorm = 105.5396, GNorm = 0.4396, lr_0 = 8.7126e-04
Loss = 9.5365e-02, PNorm = 105.6667, GNorm = 0.4377, lr_0 = 8.7067e-04
Loss = 9.5739e-02, PNorm = 105.7857, GNorm = 0.6676, lr_0 = 8.7007e-04
Loss = 9.3760e-02, PNorm = 105.9017, GNorm = 0.5410, lr_0 = 8.6947e-04
Loss = 9.5261e-02, PNorm = 106.0228, GNorm = 0.6072, lr_0 = 8.6888e-04
Loss = 9.4443e-02, PNorm = 106.1366, GNorm = 0.3937, lr_0 = 8.6828e-04
Loss = 1.0706e-01, PNorm = 106.2621, GNorm = 0.3884, lr_0 = 8.6769e-04
Loss = 8.7330e-02, PNorm = 106.3806, GNorm = 0.5502, lr_0 = 8.6709e-04
Loss = 9.1111e-02, PNorm = 106.4924, GNorm = 0.6435, lr_0 = 8.6650e-04
Loss = 1.1339e-01, PNorm = 106.6041, GNorm = 0.8997, lr_0 = 8.6590e-04
Loss = 9.7389e-02, PNorm = 106.7214, GNorm = 0.3782, lr_0 = 8.6531e-04
Loss = 8.6202e-02, PNorm = 106.8332, GNorm = 0.4775, lr_0 = 8.6472e-04
Loss = 8.3036e-02, PNorm = 106.9331, GNorm = 0.4348, lr_0 = 8.6413e-04
Loss = 9.4540e-02, PNorm = 107.0438, GNorm = 1.0468, lr_0 = 8.6353e-04
Loss = 9.7552e-02, PNorm = 107.1540, GNorm = 0.4681, lr_0 = 8.6294e-04
Loss = 9.3062e-02, PNorm = 107.2732, GNorm = 0.5179, lr_0 = 8.6235e-04
Loss = 8.5566e-02, PNorm = 107.3739, GNorm = 0.4264, lr_0 = 8.6176e-04
Loss = 9.4899e-02, PNorm = 107.4744, GNorm = 0.7822, lr_0 = 8.6117e-04
Loss = 8.5605e-02, PNorm = 107.5798, GNorm = 0.6874, lr_0 = 8.6058e-04
Loss = 1.0088e-01, PNorm = 107.6943, GNorm = 0.3578, lr_0 = 8.5999e-04
Loss = 1.0484e-01, PNorm = 107.8091, GNorm = 1.0125, lr_0 = 8.5940e-04
Loss = 1.1037e-01, PNorm = 107.9222, GNorm = 0.6650, lr_0 = 8.5881e-04
Loss = 9.0752e-02, PNorm = 108.0404, GNorm = 0.6158, lr_0 = 8.5823e-04
Loss = 1.0286e-01, PNorm = 108.1638, GNorm = 0.7659, lr_0 = 8.5764e-04
Loss = 7.8590e-02, PNorm = 108.2820, GNorm = 0.5424, lr_0 = 8.5705e-04
Loss = 8.8624e-02, PNorm = 108.3893, GNorm = 0.5174, lr_0 = 8.5646e-04
Loss = 9.4969e-02, PNorm = 108.4913, GNorm = 0.5203, lr_0 = 8.5588e-04
Loss = 9.7814e-02, PNorm = 108.6073, GNorm = 0.8278, lr_0 = 8.5529e-04
Loss = 8.4947e-02, PNorm = 108.7200, GNorm = 0.5140, lr_0 = 8.5470e-04
Loss = 9.3517e-02, PNorm = 108.8221, GNorm = 0.6279, lr_0 = 8.5412e-04
Loss = 1.0142e-01, PNorm = 108.9408, GNorm = 0.7934, lr_0 = 8.5353e-04
Loss = 8.5941e-02, PNorm = 109.0415, GNorm = 0.7661, lr_0 = 8.5295e-04
Loss = 1.0131e-01, PNorm = 109.1597, GNorm = 0.7411, lr_0 = 8.5236e-04
Loss = 9.9346e-02, PNorm = 109.2725, GNorm = 0.5673, lr_0 = 8.5178e-04
Loss = 9.5474e-02, PNorm = 109.3987, GNorm = 0.4709, lr_0 = 8.5120e-04
Loss = 9.0414e-02, PNorm = 109.5216, GNorm = 0.4972, lr_0 = 8.5061e-04
Loss = 9.0903e-02, PNorm = 109.6361, GNorm = 0.6915, lr_0 = 8.5003e-04
Loss = 8.7038e-02, PNorm = 109.7526, GNorm = 0.7896, lr_0 = 8.4945e-04
Loss = 1.1448e-01, PNorm = 109.8477, GNorm = 1.3862, lr_0 = 8.4887e-04
Loss = 1.0537e-01, PNorm = 109.9678, GNorm = 0.7103, lr_0 = 8.4828e-04
Validation mae = 0.296020
Epoch 4
Loss = 7.0243e-02, PNorm = 110.0729, GNorm = 1.0525, lr_0 = 8.4770e-04
Loss = 6.4737e-02, PNorm = 110.1688, GNorm = 0.6135, lr_0 = 8.4712e-04
Loss = 6.2972e-02, PNorm = 110.2477, GNorm = 0.9764, lr_0 = 8.4654e-04
Loss = 6.4674e-02, PNorm = 110.3154, GNorm = 1.0202, lr_0 = 8.4596e-04
Loss = 5.9901e-02, PNorm = 110.3877, GNorm = 0.8533, lr_0 = 8.4538e-04
Loss = 6.2961e-02, PNorm = 110.4561, GNorm = 0.5618, lr_0 = 8.4480e-04
Loss = 6.0034e-02, PNorm = 110.5291, GNorm = 0.4625, lr_0 = 8.4423e-04
Loss = 5.8197e-02, PNorm = 110.5971, GNorm = 0.5504, lr_0 = 8.4365e-04
Loss = 6.2060e-02, PNorm = 110.6574, GNorm = 0.7299, lr_0 = 8.4307e-04
Loss = 5.9865e-02, PNorm = 110.7230, GNorm = 0.4786, lr_0 = 8.4249e-04
Loss = 5.4230e-02, PNorm = 110.7834, GNorm = 0.2442, lr_0 = 8.4191e-04
Loss = 5.4309e-02, PNorm = 110.8488, GNorm = 0.3012, lr_0 = 8.4134e-04
Loss = 5.1709e-02, PNorm = 110.9081, GNorm = 0.3706, lr_0 = 8.4076e-04
Loss = 5.1436e-02, PNorm = 110.9652, GNorm = 0.3466, lr_0 = 8.4019e-04
Loss = 5.3278e-02, PNorm = 111.0295, GNorm = 0.8517, lr_0 = 8.3961e-04
Loss = 4.9430e-02, PNorm = 111.0938, GNorm = 0.3595, lr_0 = 8.3903e-04
Loss = 5.6204e-02, PNorm = 111.1609, GNorm = 0.4313, lr_0 = 8.3846e-04
Loss = 4.9263e-02, PNorm = 111.2342, GNorm = 0.7437, lr_0 = 8.3789e-04
Loss = 4.7458e-02, PNorm = 111.2919, GNorm = 0.6690, lr_0 = 8.3731e-04
Loss = 5.3550e-02, PNorm = 111.3647, GNorm = 0.6222, lr_0 = 8.3674e-04
Loss = 5.4059e-02, PNorm = 111.4280, GNorm = 0.4228, lr_0 = 8.3616e-04
Loss = 6.2569e-02, PNorm = 111.5011, GNorm = 0.3367, lr_0 = 8.3559e-04
Loss = 5.0529e-02, PNorm = 111.5711, GNorm = 0.5941, lr_0 = 8.3502e-04
Loss = 5.6263e-02, PNorm = 111.6513, GNorm = 0.4541, lr_0 = 8.3445e-04
Loss = 5.5128e-02, PNorm = 111.7099, GNorm = 0.5817, lr_0 = 8.3388e-04
Loss = 4.9316e-02, PNorm = 111.7858, GNorm = 0.4873, lr_0 = 8.3330e-04
Loss = 5.0734e-02, PNorm = 111.8397, GNorm = 0.6602, lr_0 = 8.3273e-04
Loss = 5.0656e-02, PNorm = 111.9091, GNorm = 0.4639, lr_0 = 8.3216e-04
Loss = 4.9085e-02, PNorm = 111.9755, GNorm = 0.3387, lr_0 = 8.3159e-04
Loss = 5.6572e-02, PNorm = 112.0342, GNorm = 0.7208, lr_0 = 8.3102e-04
Loss = 5.9602e-02, PNorm = 112.1075, GNorm = 0.5231, lr_0 = 8.3045e-04
Loss = 5.0831e-02, PNorm = 112.1772, GNorm = 0.3190, lr_0 = 8.2988e-04
Loss = 5.7654e-02, PNorm = 112.2513, GNorm = 0.7264, lr_0 = 8.2932e-04
Loss = 5.6607e-02, PNorm = 112.3235, GNorm = 0.4146, lr_0 = 8.2875e-04
Loss = 5.4643e-02, PNorm = 112.3847, GNorm = 0.7334, lr_0 = 8.2818e-04
Loss = 5.5923e-02, PNorm = 112.4536, GNorm = 0.5188, lr_0 = 8.2761e-04
Loss = 6.5877e-02, PNorm = 112.5214, GNorm = 0.3520, lr_0 = 8.2705e-04
Loss = 5.9489e-02, PNorm = 112.5983, GNorm = 0.3677, lr_0 = 8.2648e-04
Loss = 5.9259e-02, PNorm = 112.6785, GNorm = 0.7369, lr_0 = 8.2591e-04
Loss = 5.3632e-02, PNorm = 112.7561, GNorm = 0.3828, lr_0 = 8.2535e-04
Loss = 6.2020e-02, PNorm = 112.8307, GNorm = 0.3926, lr_0 = 8.2478e-04
Loss = 5.6127e-02, PNorm = 112.9038, GNorm = 0.5137, lr_0 = 8.2422e-04
Loss = 5.5848e-02, PNorm = 112.9775, GNorm = 0.3227, lr_0 = 8.2365e-04
Loss = 5.3022e-02, PNorm = 113.0498, GNorm = 0.5370, lr_0 = 8.2309e-04
Loss = 5.3025e-02, PNorm = 113.1254, GNorm = 0.3945, lr_0 = 8.2252e-04
Loss = 6.0725e-02, PNorm = 113.2011, GNorm = 0.5816, lr_0 = 8.2196e-04
Loss = 5.7409e-02, PNorm = 113.2744, GNorm = 0.3610, lr_0 = 8.2140e-04
Loss = 6.1473e-02, PNorm = 113.3548, GNorm = 0.6989, lr_0 = 8.2084e-04
Loss = 7.0152e-02, PNorm = 113.4300, GNorm = 0.4830, lr_0 = 8.2027e-04
Loss = 6.1409e-02, PNorm = 113.5166, GNorm = 0.3139, lr_0 = 8.1971e-04
Loss = 5.5476e-02, PNorm = 113.5951, GNorm = 0.3531, lr_0 = 8.1915e-04
Loss = 5.5154e-02, PNorm = 113.6838, GNorm = 0.6933, lr_0 = 8.1859e-04
Loss = 5.9763e-02, PNorm = 113.7576, GNorm = 0.4258, lr_0 = 8.1803e-04
Loss = 5.6845e-02, PNorm = 113.8336, GNorm = 0.6238, lr_0 = 8.1747e-04
Loss = 5.1952e-02, PNorm = 113.9120, GNorm = 0.3661, lr_0 = 8.1691e-04
Loss = 6.2342e-02, PNorm = 113.9936, GNorm = 0.7353, lr_0 = 8.1635e-04
Loss = 5.5195e-02, PNorm = 114.0770, GNorm = 0.3791, lr_0 = 8.1579e-04
Loss = 5.3254e-02, PNorm = 114.1531, GNorm = 0.3726, lr_0 = 8.1523e-04
Loss = 5.6874e-02, PNorm = 114.2280, GNorm = 0.6379, lr_0 = 8.1467e-04
Loss = 5.7331e-02, PNorm = 114.3044, GNorm = 0.5088, lr_0 = 8.1411e-04
Loss = 5.5503e-02, PNorm = 114.3748, GNorm = 0.7138, lr_0 = 8.1355e-04
Loss = 6.4338e-02, PNorm = 114.4517, GNorm = 0.4404, lr_0 = 8.1300e-04
Loss = 6.7587e-02, PNorm = 114.5308, GNorm = 0.4850, lr_0 = 8.1244e-04
Loss = 6.6305e-02, PNorm = 114.6155, GNorm = 0.2979, lr_0 = 8.1188e-04
Loss = 5.8650e-02, PNorm = 114.7016, GNorm = 0.6749, lr_0 = 8.1133e-04
Loss = 6.1235e-02, PNorm = 114.7725, GNorm = 0.5487, lr_0 = 8.1077e-04
Loss = 6.3555e-02, PNorm = 114.8611, GNorm = 0.8103, lr_0 = 8.1022e-04
Loss = 6.5055e-02, PNorm = 114.9473, GNorm = 0.4079, lr_0 = 8.0966e-04
Loss = 5.8836e-02, PNorm = 115.0367, GNorm = 0.3252, lr_0 = 8.0911e-04
Loss = 5.9547e-02, PNorm = 115.1292, GNorm = 0.7688, lr_0 = 8.0855e-04
Loss = 6.1378e-02, PNorm = 115.2112, GNorm = 0.5329, lr_0 = 8.0800e-04
Loss = 5.7770e-02, PNorm = 115.3067, GNorm = 0.6997, lr_0 = 8.0745e-04
Loss = 6.4977e-02, PNorm = 115.4014, GNorm = 0.4511, lr_0 = 8.0689e-04
Loss = 7.2786e-02, PNorm = 115.5016, GNorm = 0.5005, lr_0 = 8.0634e-04
Loss = 6.1247e-02, PNorm = 115.6033, GNorm = 0.4753, lr_0 = 8.0579e-04
Loss = 7.3492e-02, PNorm = 115.6977, GNorm = 0.7669, lr_0 = 8.0523e-04
Loss = 6.8776e-02, PNorm = 115.7933, GNorm = 0.3133, lr_0 = 8.0468e-04
Loss = 6.6427e-02, PNorm = 115.8907, GNorm = 0.4691, lr_0 = 8.0413e-04
Loss = 6.2609e-02, PNorm = 115.9807, GNorm = 0.8420, lr_0 = 8.0358e-04
Loss = 6.2516e-02, PNorm = 116.0720, GNorm = 0.3266, lr_0 = 8.0303e-04
Loss = 5.6043e-02, PNorm = 116.1587, GNorm = 0.5607, lr_0 = 8.0248e-04
Loss = 5.7885e-02, PNorm = 116.2411, GNorm = 0.5546, lr_0 = 8.0193e-04
Loss = 5.3616e-02, PNorm = 116.3230, GNorm = 0.3366, lr_0 = 8.0138e-04
Loss = 5.8204e-02, PNorm = 116.4056, GNorm = 0.4184, lr_0 = 8.0083e-04
Loss = 6.7080e-02, PNorm = 116.4884, GNorm = 0.6422, lr_0 = 8.0028e-04
Loss = 6.5764e-02, PNorm = 116.5820, GNorm = 0.6844, lr_0 = 7.9974e-04
Loss = 6.7302e-02, PNorm = 116.6731, GNorm = 0.4499, lr_0 = 7.9919e-04
Loss = 6.1329e-02, PNorm = 116.7721, GNorm = 0.4487, lr_0 = 7.9864e-04
Loss = 6.2570e-02, PNorm = 116.8602, GNorm = 0.5621, lr_0 = 7.9809e-04
Loss = 5.5016e-02, PNorm = 116.9531, GNorm = 0.8122, lr_0 = 7.9755e-04
Loss = 6.5611e-02, PNorm = 117.0362, GNorm = 0.6419, lr_0 = 7.9700e-04
Loss = 6.8682e-02, PNorm = 117.1256, GNorm = 0.4255, lr_0 = 7.9645e-04
Loss = 5.8575e-02, PNorm = 117.2149, GNorm = 0.4770, lr_0 = 7.9591e-04
Loss = 6.0751e-02, PNorm = 117.3131, GNorm = 0.5818, lr_0 = 7.9536e-04
Loss = 7.0655e-02, PNorm = 117.4106, GNorm = 0.3074, lr_0 = 7.9482e-04
Loss = 6.5165e-02, PNorm = 117.5167, GNorm = 0.3759, lr_0 = 7.9427e-04
Loss = 5.9767e-02, PNorm = 117.6109, GNorm = 0.6069, lr_0 = 7.9373e-04
Loss = 5.5276e-02, PNorm = 117.6986, GNorm = 0.7542, lr_0 = 7.9319e-04
Loss = 6.2834e-02, PNorm = 117.7797, GNorm = 1.0030, lr_0 = 7.9264e-04
Loss = 5.9655e-02, PNorm = 117.8683, GNorm = 0.2594, lr_0 = 7.9210e-04
Loss = 6.0026e-02, PNorm = 117.9568, GNorm = 0.3842, lr_0 = 7.9156e-04
Loss = 6.9994e-02, PNorm = 118.0534, GNorm = 1.1349, lr_0 = 7.9101e-04
Loss = 5.5255e-02, PNorm = 118.1409, GNorm = 0.3877, lr_0 = 7.9047e-04
Loss = 6.5924e-02, PNorm = 118.2266, GNorm = 0.8182, lr_0 = 7.8993e-04
Loss = 5.8697e-02, PNorm = 118.3152, GNorm = 0.4548, lr_0 = 7.8939e-04
Loss = 6.2755e-02, PNorm = 118.4051, GNorm = 0.4577, lr_0 = 7.8885e-04
Loss = 6.9618e-02, PNorm = 118.5011, GNorm = 0.5309, lr_0 = 7.8831e-04
Loss = 6.0565e-02, PNorm = 118.5881, GNorm = 0.3879, lr_0 = 7.8777e-04
Loss = 6.5317e-02, PNorm = 118.6809, GNorm = 0.4145, lr_0 = 7.8723e-04
Loss = 6.3214e-02, PNorm = 118.7644, GNorm = 0.4273, lr_0 = 7.8669e-04
Loss = 6.4553e-02, PNorm = 118.8626, GNorm = 0.8351, lr_0 = 7.8615e-04
Loss = 7.1423e-02, PNorm = 118.9546, GNorm = 0.9113, lr_0 = 7.8561e-04
Loss = 6.4772e-02, PNorm = 119.0568, GNorm = 0.3306, lr_0 = 7.8507e-04
Loss = 6.5093e-02, PNorm = 119.1486, GNorm = 1.1707, lr_0 = 7.8454e-04
Loss = 5.7945e-02, PNorm = 119.2401, GNorm = 0.7201, lr_0 = 7.8400e-04
Loss = 6.7398e-02, PNorm = 119.3340, GNorm = 0.7583, lr_0 = 7.8346e-04
Loss = 6.1907e-02, PNorm = 119.4319, GNorm = 0.3387, lr_0 = 7.8293e-04
Loss = 7.3726e-02, PNorm = 119.5280, GNorm = 0.6727, lr_0 = 7.8239e-04
Loss = 6.7606e-02, PNorm = 119.6286, GNorm = 0.3211, lr_0 = 7.8185e-04
Loss = 6.5562e-02, PNorm = 119.7209, GNorm = 0.6517, lr_0 = 7.8132e-04
Validation mae = 0.291377
Epoch 5
Loss = 4.4642e-02, PNorm = 119.8075, GNorm = 0.2686, lr_0 = 7.8078e-04
Loss = 4.3812e-02, PNorm = 119.8871, GNorm = 0.5244, lr_0 = 7.8025e-04
Loss = 3.7891e-02, PNorm = 119.9565, GNorm = 0.7479, lr_0 = 7.7971e-04
Loss = 4.7856e-02, PNorm = 120.0196, GNorm = 0.3504, lr_0 = 7.7918e-04
Loss = 4.4758e-02, PNorm = 120.0793, GNorm = 0.6408, lr_0 = 7.7864e-04
Loss = 4.6758e-02, PNorm = 120.1453, GNorm = 0.7726, lr_0 = 7.7811e-04
Loss = 4.5726e-02, PNorm = 120.1994, GNorm = 1.1154, lr_0 = 7.7758e-04
Loss = 3.9374e-02, PNorm = 120.2560, GNorm = 0.6208, lr_0 = 7.7705e-04
Loss = 4.4167e-02, PNorm = 120.3123, GNorm = 0.2276, lr_0 = 7.7651e-04
Loss = 4.2349e-02, PNorm = 120.3814, GNorm = 0.5474, lr_0 = 7.7598e-04
Loss = 4.7758e-02, PNorm = 120.4423, GNorm = 0.3135, lr_0 = 7.7545e-04
Loss = 4.0715e-02, PNorm = 120.5022, GNorm = 0.7470, lr_0 = 7.7492e-04
Loss = 3.5491e-02, PNorm = 120.5605, GNorm = 0.7255, lr_0 = 7.7439e-04
Loss = 3.8995e-02, PNorm = 120.6243, GNorm = 0.5763, lr_0 = 7.7386e-04
Loss = 4.1994e-02, PNorm = 120.6744, GNorm = 0.6056, lr_0 = 7.7333e-04
Loss = 4.4848e-02, PNorm = 120.7360, GNorm = 0.3091, lr_0 = 7.7280e-04
Loss = 3.9414e-02, PNorm = 120.7921, GNorm = 0.5234, lr_0 = 7.7227e-04
Loss = 3.8472e-02, PNorm = 120.8520, GNorm = 1.0421, lr_0 = 7.7174e-04
Loss = 4.3122e-02, PNorm = 120.9124, GNorm = 0.5358, lr_0 = 7.7121e-04
Loss = 4.7161e-02, PNorm = 120.9685, GNorm = 0.5556, lr_0 = 7.7068e-04
Loss = 4.1156e-02, PNorm = 121.0389, GNorm = 0.4135, lr_0 = 7.7015e-04
Loss = 4.1202e-02, PNorm = 121.1039, GNorm = 0.4196, lr_0 = 7.6963e-04
Loss = 4.5190e-02, PNorm = 121.1689, GNorm = 0.7882, lr_0 = 7.6910e-04
Loss = 3.8801e-02, PNorm = 121.2295, GNorm = 0.2750, lr_0 = 7.6857e-04
Loss = 3.8526e-02, PNorm = 121.2846, GNorm = 0.2557, lr_0 = 7.6805e-04
Loss = 4.5782e-02, PNorm = 121.3437, GNorm = 0.2113, lr_0 = 7.6752e-04
Loss = 3.9037e-02, PNorm = 121.4085, GNorm = 0.4792, lr_0 = 7.6699e-04
Loss = 4.3644e-02, PNorm = 121.4648, GNorm = 0.3676, lr_0 = 7.6647e-04
Loss = 5.1360e-02, PNorm = 121.5256, GNorm = 0.3789, lr_0 = 7.6594e-04
Loss = 4.0343e-02, PNorm = 121.5959, GNorm = 0.4777, lr_0 = 7.6542e-04
Loss = 3.8627e-02, PNorm = 121.6673, GNorm = 0.3887, lr_0 = 7.6489e-04
Loss = 4.2387e-02, PNorm = 121.7329, GNorm = 0.2929, lr_0 = 7.6437e-04
Loss = 3.6189e-02, PNorm = 121.7957, GNorm = 0.4397, lr_0 = 7.6385e-04
Loss = 4.3819e-02, PNorm = 121.8498, GNorm = 0.4986, lr_0 = 7.6332e-04
Loss = 4.2867e-02, PNorm = 121.9131, GNorm = 0.5504, lr_0 = 7.6280e-04
Loss = 4.2871e-02, PNorm = 121.9879, GNorm = 0.4385, lr_0 = 7.6228e-04
Loss = 3.8280e-02, PNorm = 122.0546, GNorm = 0.5241, lr_0 = 7.6176e-04
Loss = 4.2385e-02, PNorm = 122.1254, GNorm = 0.5522, lr_0 = 7.6123e-04
Loss = 4.5154e-02, PNorm = 122.1932, GNorm = 0.5466, lr_0 = 7.6071e-04
Loss = 3.6568e-02, PNorm = 122.2576, GNorm = 0.4104, lr_0 = 7.6019e-04
Loss = 3.9473e-02, PNorm = 122.3189, GNorm = 0.3844, lr_0 = 7.5967e-04
Loss = 4.2244e-02, PNorm = 122.3785, GNorm = 0.6331, lr_0 = 7.5915e-04
Loss = 5.3433e-02, PNorm = 122.4423, GNorm = 0.3313, lr_0 = 7.5863e-04
Loss = 4.3033e-02, PNorm = 122.5134, GNorm = 0.4173, lr_0 = 7.5811e-04
Loss = 3.9630e-02, PNorm = 122.5821, GNorm = 0.6570, lr_0 = 7.5759e-04
Loss = 4.8472e-02, PNorm = 122.6636, GNorm = 0.4102, lr_0 = 7.5707e-04
Loss = 3.7463e-02, PNorm = 122.7376, GNorm = 0.7614, lr_0 = 7.5655e-04
Loss = 4.0516e-02, PNorm = 122.8093, GNorm = 0.3754, lr_0 = 7.5603e-04
Loss = 3.7784e-02, PNorm = 122.8742, GNorm = 0.2241, lr_0 = 7.5552e-04
Loss = 3.7694e-02, PNorm = 122.9430, GNorm = 0.2983, lr_0 = 7.5500e-04
Loss = 3.8102e-02, PNorm = 123.0024, GNorm = 0.3826, lr_0 = 7.5448e-04
Loss = 4.4518e-02, PNorm = 123.0653, GNorm = 0.3205, lr_0 = 7.5397e-04
Loss = 3.6550e-02, PNorm = 123.1299, GNorm = 0.2321, lr_0 = 7.5345e-04
Loss = 3.9898e-02, PNorm = 123.1927, GNorm = 0.6844, lr_0 = 7.5293e-04
Loss = 4.5894e-02, PNorm = 123.2572, GNorm = 0.6915, lr_0 = 7.5242e-04
Loss = 4.2483e-02, PNorm = 123.3229, GNorm = 0.2213, lr_0 = 7.5190e-04
Loss = 4.7828e-02, PNorm = 123.3908, GNorm = 0.2841, lr_0 = 7.5139e-04
Loss = 4.3396e-02, PNorm = 123.4612, GNorm = 0.7359, lr_0 = 7.5087e-04
Loss = 3.6300e-02, PNorm = 123.5241, GNorm = 0.5306, lr_0 = 7.5036e-04
Loss = 3.6212e-02, PNorm = 123.5902, GNorm = 0.2719, lr_0 = 7.4984e-04
Loss = 4.5683e-02, PNorm = 123.6673, GNorm = 0.7818, lr_0 = 7.4933e-04
Loss = 4.3610e-02, PNorm = 123.7486, GNorm = 0.2803, lr_0 = 7.4882e-04
Loss = 4.7234e-02, PNorm = 123.8307, GNorm = 0.3231, lr_0 = 7.4830e-04
Loss = 4.4567e-02, PNorm = 123.9046, GNorm = 0.4176, lr_0 = 7.4779e-04
Loss = 5.9268e-02, PNorm = 123.9824, GNorm = 0.7334, lr_0 = 7.4728e-04
Loss = 4.5715e-02, PNorm = 124.0617, GNorm = 0.4202, lr_0 = 7.4677e-04
Loss = 5.2616e-02, PNorm = 124.1367, GNorm = 0.4848, lr_0 = 7.4625e-04
Loss = 4.1725e-02, PNorm = 124.2119, GNorm = 0.2656, lr_0 = 7.4574e-04
Loss = 4.4031e-02, PNorm = 124.2905, GNorm = 0.6294, lr_0 = 7.4523e-04
Loss = 4.5542e-02, PNorm = 124.3630, GNorm = 0.8029, lr_0 = 7.4472e-04
Loss = 3.7825e-02, PNorm = 124.4374, GNorm = 0.2848, lr_0 = 7.4421e-04
Loss = 4.7288e-02, PNorm = 124.5077, GNorm = 0.4713, lr_0 = 7.4370e-04
Loss = 3.8804e-02, PNorm = 124.5788, GNorm = 0.3242, lr_0 = 7.4319e-04
Loss = 3.8341e-02, PNorm = 124.6438, GNorm = 0.5533, lr_0 = 7.4268e-04
Loss = 4.6711e-02, PNorm = 124.7148, GNorm = 0.3775, lr_0 = 7.4217e-04
Loss = 4.3841e-02, PNorm = 124.7784, GNorm = 0.2750, lr_0 = 7.4167e-04
Loss = 4.1371e-02, PNorm = 124.8499, GNorm = 0.2976, lr_0 = 7.4116e-04
Loss = 4.1563e-02, PNorm = 124.9288, GNorm = 0.4266, lr_0 = 7.4065e-04
Loss = 4.9706e-02, PNorm = 124.9991, GNorm = 0.3715, lr_0 = 7.4014e-04
Loss = 4.6625e-02, PNorm = 125.0713, GNorm = 0.5040, lr_0 = 7.3964e-04
Loss = 3.8767e-02, PNorm = 125.1527, GNorm = 0.3912, lr_0 = 7.3913e-04
Loss = 5.2290e-02, PNorm = 125.2360, GNorm = 0.5267, lr_0 = 7.3862e-04
Loss = 4.7826e-02, PNorm = 125.3197, GNorm = 0.3024, lr_0 = 7.3812e-04
Loss = 4.2895e-02, PNorm = 125.3963, GNorm = 0.4290, lr_0 = 7.3761e-04
Loss = 4.3643e-02, PNorm = 125.4689, GNorm = 0.7576, lr_0 = 7.3711e-04
Loss = 4.2519e-02, PNorm = 125.5422, GNorm = 0.2953, lr_0 = 7.3660e-04
Loss = 3.2522e-02, PNorm = 125.6132, GNorm = 0.2996, lr_0 = 7.3610e-04
Loss = 4.4955e-02, PNorm = 125.6858, GNorm = 0.6528, lr_0 = 7.3559e-04
Loss = 4.2897e-02, PNorm = 125.7587, GNorm = 0.3882, lr_0 = 7.3509e-04
Loss = 4.0412e-02, PNorm = 125.8369, GNorm = 0.2702, lr_0 = 7.3458e-04
Loss = 5.4978e-02, PNorm = 125.9143, GNorm = 0.6678, lr_0 = 7.3408e-04
Loss = 4.3535e-02, PNorm = 125.9929, GNorm = 0.7242, lr_0 = 7.3358e-04
Loss = 4.5125e-02, PNorm = 126.0690, GNorm = 0.3451, lr_0 = 7.3308e-04
Loss = 4.9923e-02, PNorm = 126.1459, GNorm = 0.2946, lr_0 = 7.3257e-04
Loss = 4.5161e-02, PNorm = 126.2262, GNorm = 0.4609, lr_0 = 7.3207e-04
Loss = 4.8458e-02, PNorm = 126.3053, GNorm = 0.7138, lr_0 = 7.3157e-04
Loss = 4.6600e-02, PNorm = 126.3800, GNorm = 0.5238, lr_0 = 7.3107e-04
Loss = 5.3490e-02, PNorm = 126.4581, GNorm = 0.3426, lr_0 = 7.3057e-04
Loss = 4.7586e-02, PNorm = 126.5475, GNorm = 0.2669, lr_0 = 7.3007e-04
Loss = 4.7883e-02, PNorm = 126.6345, GNorm = 0.6684, lr_0 = 7.2957e-04
Loss = 4.1905e-02, PNorm = 126.7252, GNorm = 0.4176, lr_0 = 7.2907e-04
Loss = 4.7099e-02, PNorm = 126.8114, GNorm = 0.2917, lr_0 = 7.2857e-04
Loss = 4.6645e-02, PNorm = 126.8961, GNorm = 0.5479, lr_0 = 7.2807e-04
Loss = 4.2640e-02, PNorm = 126.9796, GNorm = 0.3695, lr_0 = 7.2757e-04
Loss = 4.4687e-02, PNorm = 127.0558, GNorm = 0.4616, lr_0 = 7.2707e-04
Loss = 4.5073e-02, PNorm = 127.1366, GNorm = 0.5878, lr_0 = 7.2657e-04
Loss = 5.3698e-02, PNorm = 127.2091, GNorm = 0.5571, lr_0 = 7.2608e-04
Loss = 4.5828e-02, PNorm = 127.2864, GNorm = 0.4111, lr_0 = 7.2558e-04
Loss = 5.6675e-02, PNorm = 127.3626, GNorm = 0.3439, lr_0 = 7.2508e-04
Loss = 4.7236e-02, PNorm = 127.4517, GNorm = 0.4307, lr_0 = 7.2458e-04
Loss = 4.7304e-02, PNorm = 127.5344, GNorm = 0.3008, lr_0 = 7.2409e-04
Loss = 5.3595e-02, PNorm = 127.6138, GNorm = 0.3069, lr_0 = 7.2359e-04
Loss = 4.8950e-02, PNorm = 127.6992, GNorm = 0.2504, lr_0 = 7.2310e-04
Loss = 4.4737e-02, PNorm = 127.7835, GNorm = 0.6227, lr_0 = 7.2260e-04
Loss = 4.9594e-02, PNorm = 127.8591, GNorm = 0.5856, lr_0 = 7.2211e-04
Loss = 4.9755e-02, PNorm = 127.9457, GNorm = 0.4612, lr_0 = 7.2161e-04
Loss = 5.4808e-02, PNorm = 128.0253, GNorm = 0.4730, lr_0 = 7.2112e-04
Loss = 4.9454e-02, PNorm = 128.1216, GNorm = 0.3709, lr_0 = 7.2062e-04
Loss = 4.7048e-02, PNorm = 128.2056, GNorm = 0.7975, lr_0 = 7.2013e-04
Loss = 3.9034e-02, PNorm = 128.2921, GNorm = 0.8010, lr_0 = 7.1964e-04
Validation mae = 0.289039
Epoch 6
Loss = 3.6548e-02, PNorm = 128.3557, GNorm = 0.2223, lr_0 = 7.1914e-04
Loss = 3.3116e-02, PNorm = 128.4196, GNorm = 0.5304, lr_0 = 7.1865e-04
Loss = 3.3082e-02, PNorm = 128.4747, GNorm = 0.2556, lr_0 = 7.1816e-04
Loss = 2.9027e-02, PNorm = 128.5224, GNorm = 0.2082, lr_0 = 7.1767e-04
Loss = 3.7540e-02, PNorm = 128.5676, GNorm = 0.5974, lr_0 = 7.1717e-04
Loss = 3.7441e-02, PNorm = 128.6133, GNorm = 0.7937, lr_0 = 7.1668e-04
Loss = 3.0354e-02, PNorm = 128.6644, GNorm = 0.4826, lr_0 = 7.1619e-04
Loss = 3.1390e-02, PNorm = 128.7081, GNorm = 0.6572, lr_0 = 7.1570e-04
Loss = 3.8216e-02, PNorm = 128.7576, GNorm = 0.2743, lr_0 = 7.1521e-04
Loss = 3.9157e-02, PNorm = 128.8145, GNorm = 0.4236, lr_0 = 7.1472e-04
Loss = 4.0927e-02, PNorm = 128.8722, GNorm = 0.2187, lr_0 = 7.1423e-04
Loss = 3.6238e-02, PNorm = 128.9217, GNorm = 0.9944, lr_0 = 7.1374e-04
Loss = 3.4020e-02, PNorm = 128.9722, GNorm = 0.6081, lr_0 = 7.1325e-04
Loss = 3.0984e-02, PNorm = 129.0237, GNorm = 0.2300, lr_0 = 7.1277e-04
Loss = 3.6272e-02, PNorm = 129.0766, GNorm = 0.4323, lr_0 = 7.1228e-04
Loss = 3.4117e-02, PNorm = 129.1319, GNorm = 0.2331, lr_0 = 7.1179e-04
Loss = 3.1530e-02, PNorm = 129.1882, GNorm = 0.3861, lr_0 = 7.1130e-04
Loss = 3.7332e-02, PNorm = 129.2432, GNorm = 0.7555, lr_0 = 7.1081e-04
Loss = 3.4606e-02, PNorm = 129.3053, GNorm = 0.7675, lr_0 = 7.1033e-04
Loss = 3.7416e-02, PNorm = 129.3578, GNorm = 0.2474, lr_0 = 7.0984e-04
Loss = 3.4330e-02, PNorm = 129.4120, GNorm = 0.2562, lr_0 = 7.0935e-04
Loss = 3.7283e-02, PNorm = 129.4679, GNorm = 0.3178, lr_0 = 7.0887e-04
Loss = 3.2888e-02, PNorm = 129.5199, GNorm = 0.3931, lr_0 = 7.0838e-04
Loss = 3.6350e-02, PNorm = 129.5762, GNorm = 0.3216, lr_0 = 7.0790e-04
Loss = 2.9601e-02, PNorm = 129.6337, GNorm = 0.4917, lr_0 = 7.0741e-04
Loss = 3.7786e-02, PNorm = 129.6863, GNorm = 0.4763, lr_0 = 7.0693e-04
Loss = 3.3276e-02, PNorm = 129.7451, GNorm = 0.4499, lr_0 = 7.0644e-04
Loss = 3.2485e-02, PNorm = 129.8012, GNorm = 0.1857, lr_0 = 7.0596e-04
Loss = 3.5419e-02, PNorm = 129.8615, GNorm = 0.2518, lr_0 = 7.0548e-04
Loss = 2.8231e-02, PNorm = 129.9168, GNorm = 0.2010, lr_0 = 7.0499e-04
Loss = 3.4271e-02, PNorm = 129.9737, GNorm = 0.2960, lr_0 = 7.0451e-04
Loss = 3.2453e-02, PNorm = 130.0243, GNorm = 0.2797, lr_0 = 7.0403e-04
Loss = 3.5734e-02, PNorm = 130.0844, GNorm = 0.3175, lr_0 = 7.0354e-04
Loss = 2.8971e-02, PNorm = 130.1396, GNorm = 0.5363, lr_0 = 7.0306e-04
Loss = 2.7631e-02, PNorm = 130.1958, GNorm = 0.3019, lr_0 = 7.0258e-04
Loss = 3.2382e-02, PNorm = 130.2417, GNorm = 0.2671, lr_0 = 7.0210e-04
Loss = 2.8125e-02, PNorm = 130.2956, GNorm = 0.2491, lr_0 = 7.0162e-04
Loss = 3.5157e-02, PNorm = 130.3478, GNorm = 0.4268, lr_0 = 7.0114e-04
Loss = 3.1866e-02, PNorm = 130.4066, GNorm = 0.4388, lr_0 = 7.0066e-04
Loss = 3.2980e-02, PNorm = 130.4664, GNorm = 0.2699, lr_0 = 7.0018e-04
Loss = 2.6888e-02, PNorm = 130.5118, GNorm = 0.5000, lr_0 = 6.9970e-04
Loss = 2.7970e-02, PNorm = 130.5602, GNorm = 0.5709, lr_0 = 6.9922e-04
Loss = 3.4222e-02, PNorm = 130.6109, GNorm = 0.4535, lr_0 = 6.9874e-04
Loss = 3.9285e-02, PNorm = 130.6685, GNorm = 0.3291, lr_0 = 6.9826e-04
Loss = 2.9031e-02, PNorm = 130.7264, GNorm = 0.3380, lr_0 = 6.9778e-04
Loss = 3.0610e-02, PNorm = 130.7805, GNorm = 0.2944, lr_0 = 6.9730e-04
Loss = 3.5109e-02, PNorm = 130.8272, GNorm = 0.5633, lr_0 = 6.9683e-04
Loss = 3.3238e-02, PNorm = 130.8853, GNorm = 0.4181, lr_0 = 6.9635e-04
Loss = 3.8220e-02, PNorm = 130.9513, GNorm = 0.4811, lr_0 = 6.9587e-04
Loss = 3.0354e-02, PNorm = 131.0083, GNorm = 0.5450, lr_0 = 6.9540e-04
Loss = 3.3620e-02, PNorm = 131.0703, GNorm = 0.2761, lr_0 = 6.9492e-04
Loss = 3.2579e-02, PNorm = 131.1265, GNorm = 0.8507, lr_0 = 6.9444e-04
Loss = 3.6926e-02, PNorm = 131.1849, GNorm = 0.4405, lr_0 = 6.9397e-04
Loss = 3.2968e-02, PNorm = 131.2421, GNorm = 0.4950, lr_0 = 6.9349e-04
Loss = 2.9915e-02, PNorm = 131.3042, GNorm = 0.3836, lr_0 = 6.9302e-04
Loss = 3.2132e-02, PNorm = 131.3654, GNorm = 0.3023, lr_0 = 6.9254e-04
Loss = 3.3955e-02, PNorm = 131.4195, GNorm = 0.5640, lr_0 = 6.9207e-04
Loss = 3.5075e-02, PNorm = 131.4871, GNorm = 0.6847, lr_0 = 6.9159e-04
Loss = 3.1763e-02, PNorm = 131.5559, GNorm = 0.6656, lr_0 = 6.9112e-04
Loss = 2.9375e-02, PNorm = 131.6301, GNorm = 0.4561, lr_0 = 6.9065e-04
Loss = 3.2732e-02, PNorm = 131.6957, GNorm = 0.2670, lr_0 = 6.9017e-04
Loss = 4.1462e-02, PNorm = 131.7563, GNorm = 0.6541, lr_0 = 6.8970e-04
Loss = 3.1145e-02, PNorm = 131.8238, GNorm = 0.5634, lr_0 = 6.8923e-04
Loss = 3.5853e-02, PNorm = 131.8864, GNorm = 0.7615, lr_0 = 6.8876e-04
Loss = 2.9685e-02, PNorm = 131.9514, GNorm = 0.5534, lr_0 = 6.8828e-04
Loss = 3.5355e-02, PNorm = 132.0185, GNorm = 0.7094, lr_0 = 6.8781e-04
Loss = 2.9155e-02, PNorm = 132.0847, GNorm = 0.2170, lr_0 = 6.8734e-04
Loss = 3.2247e-02, PNorm = 132.1483, GNorm = 0.3604, lr_0 = 6.8687e-04
Loss = 3.0699e-02, PNorm = 132.2056, GNorm = 0.3326, lr_0 = 6.8640e-04
Loss = 3.4573e-02, PNorm = 132.2637, GNorm = 1.1134, lr_0 = 6.8593e-04
Loss = 3.6369e-02, PNorm = 132.3257, GNorm = 0.3375, lr_0 = 6.8546e-04
Loss = 3.7524e-02, PNorm = 132.3975, GNorm = 0.2458, lr_0 = 6.8499e-04
Loss = 3.0890e-02, PNorm = 132.4680, GNorm = 0.4142, lr_0 = 6.8452e-04
Loss = 2.8667e-02, PNorm = 132.5256, GNorm = 0.3972, lr_0 = 6.8405e-04
Loss = 3.7647e-02, PNorm = 132.5783, GNorm = 0.7594, lr_0 = 6.8358e-04
Loss = 3.1825e-02, PNorm = 132.6373, GNorm = 0.7208, lr_0 = 6.8312e-04
Loss = 3.2662e-02, PNorm = 132.7019, GNorm = 0.9068, lr_0 = 6.8265e-04
Loss = 3.3068e-02, PNorm = 132.7617, GNorm = 0.3006, lr_0 = 6.8218e-04
Loss = 3.1597e-02, PNorm = 132.8253, GNorm = 0.6659, lr_0 = 6.8171e-04
Loss = 3.1672e-02, PNorm = 132.8862, GNorm = 0.3818, lr_0 = 6.8125e-04
Loss = 3.2927e-02, PNorm = 132.9473, GNorm = 0.3545, lr_0 = 6.8078e-04
Loss = 3.0572e-02, PNorm = 133.0122, GNorm = 0.3836, lr_0 = 6.8031e-04
Loss = 3.2900e-02, PNorm = 133.0706, GNorm = 0.3170, lr_0 = 6.7985e-04
Loss = 3.3681e-02, PNorm = 133.1299, GNorm = 0.6577, lr_0 = 6.7938e-04
Loss = 3.3935e-02, PNorm = 133.1956, GNorm = 0.2524, lr_0 = 6.7892e-04
Loss = 3.6079e-02, PNorm = 133.2590, GNorm = 0.3851, lr_0 = 6.7845e-04
Loss = 3.4021e-02, PNorm = 133.3286, GNorm = 0.3309, lr_0 = 6.7799e-04
Loss = 3.0325e-02, PNorm = 133.3991, GNorm = 0.8773, lr_0 = 6.7752e-04
Loss = 3.5602e-02, PNorm = 133.4679, GNorm = 0.5195, lr_0 = 6.7706e-04
Loss = 3.1797e-02, PNorm = 133.5388, GNorm = 0.3936, lr_0 = 6.7659e-04
Loss = 3.4214e-02, PNorm = 133.6011, GNorm = 0.7250, lr_0 = 6.7613e-04
Loss = 3.2731e-02, PNorm = 133.6588, GNorm = 0.9863, lr_0 = 6.7567e-04
Loss = 3.6980e-02, PNorm = 133.7228, GNorm = 0.2904, lr_0 = 6.7520e-04
Loss = 3.5422e-02, PNorm = 133.7907, GNorm = 0.7687, lr_0 = 6.7474e-04
Loss = 3.8841e-02, PNorm = 133.8649, GNorm = 0.2349, lr_0 = 6.7428e-04
Loss = 3.8018e-02, PNorm = 133.9378, GNorm = 0.7406, lr_0 = 6.7382e-04
Loss = 3.5875e-02, PNorm = 134.0185, GNorm = 0.5634, lr_0 = 6.7335e-04
Loss = 4.3000e-02, PNorm = 134.0917, GNorm = 0.6429, lr_0 = 6.7289e-04
Loss = 3.3795e-02, PNorm = 134.1724, GNorm = 0.2426, lr_0 = 6.7243e-04
Loss = 4.0302e-02, PNorm = 134.2467, GNorm = 0.8357, lr_0 = 6.7197e-04
Loss = 3.9259e-02, PNorm = 134.3151, GNorm = 0.5243, lr_0 = 6.7151e-04
Loss = 3.9344e-02, PNorm = 134.3976, GNorm = 0.5133, lr_0 = 6.7105e-04
Loss = 3.2683e-02, PNorm = 134.4755, GNorm = 0.3165, lr_0 = 6.7059e-04
Loss = 3.5616e-02, PNorm = 134.5453, GNorm = 0.3449, lr_0 = 6.7013e-04
Loss = 3.5898e-02, PNorm = 134.6099, GNorm = 0.4091, lr_0 = 6.6967e-04
Loss = 3.9468e-02, PNorm = 134.6809, GNorm = 0.5613, lr_0 = 6.6921e-04
Loss = 4.3719e-02, PNorm = 134.7511, GNorm = 0.6205, lr_0 = 6.6876e-04
Loss = 3.8971e-02, PNorm = 134.8286, GNorm = 0.5055, lr_0 = 6.6830e-04
Loss = 3.7858e-02, PNorm = 134.9064, GNorm = 0.6692, lr_0 = 6.6784e-04
Loss = 4.2766e-02, PNorm = 134.9833, GNorm = 0.4378, lr_0 = 6.6738e-04
Loss = 3.7680e-02, PNorm = 135.0560, GNorm = 0.2376, lr_0 = 6.6693e-04
Loss = 4.5045e-02, PNorm = 135.1280, GNorm = 0.7308, lr_0 = 6.6647e-04
Loss = 3.8152e-02, PNorm = 135.2021, GNorm = 0.2790, lr_0 = 6.6601e-04
Loss = 3.2599e-02, PNorm = 135.2773, GNorm = 0.3883, lr_0 = 6.6556e-04
Loss = 3.8051e-02, PNorm = 135.3453, GNorm = 0.2377, lr_0 = 6.6510e-04
Loss = 3.8122e-02, PNorm = 135.4224, GNorm = 0.9433, lr_0 = 6.6464e-04
Loss = 3.5815e-02, PNorm = 135.4966, GNorm = 0.2237, lr_0 = 6.6419e-04
Loss = 3.1163e-02, PNorm = 135.5701, GNorm = 0.3867, lr_0 = 6.6373e-04
Loss = 3.4547e-02, PNorm = 135.6419, GNorm = 0.3332, lr_0 = 6.6328e-04
Loss = 3.2459e-02, PNorm = 135.7203, GNorm = 0.3038, lr_0 = 6.6282e-04
Validation mae = 0.287028
Epoch 7
Loss = 2.8554e-02, PNorm = 135.7819, GNorm = 0.8324, lr_0 = 6.6237e-04
Loss = 3.0145e-02, PNorm = 135.8370, GNorm = 0.8589, lr_0 = 6.6192e-04
Loss = 2.6781e-02, PNorm = 135.8850, GNorm = 0.3263, lr_0 = 6.6146e-04
Loss = 3.3590e-02, PNorm = 135.9324, GNorm = 0.6871, lr_0 = 6.6101e-04
Loss = 2.8306e-02, PNorm = 135.9839, GNorm = 0.3162, lr_0 = 6.6056e-04
Loss = 2.6220e-02, PNorm = 136.0342, GNorm = 0.4938, lr_0 = 6.6011e-04
Loss = 2.9209e-02, PNorm = 136.0813, GNorm = 0.6317, lr_0 = 6.5965e-04
Loss = 3.1383e-02, PNorm = 136.1331, GNorm = 0.2761, lr_0 = 6.5920e-04
Loss = 2.8026e-02, PNorm = 136.1836, GNorm = 0.6352, lr_0 = 6.5875e-04
Loss = 2.4527e-02, PNorm = 136.2339, GNorm = 0.4023, lr_0 = 6.5830e-04
Loss = 2.4447e-02, PNorm = 136.2767, GNorm = 0.2468, lr_0 = 6.5785e-04
Loss = 2.4733e-02, PNorm = 136.3211, GNorm = 0.3443, lr_0 = 6.5740e-04
Loss = 2.6672e-02, PNorm = 136.3661, GNorm = 0.2917, lr_0 = 6.5695e-04
Loss = 2.8107e-02, PNorm = 136.4139, GNorm = 0.5130, lr_0 = 6.5650e-04
Loss = 2.2837e-02, PNorm = 136.4555, GNorm = 0.5466, lr_0 = 6.5605e-04
Loss = 2.5676e-02, PNorm = 136.4968, GNorm = 0.2534, lr_0 = 6.5560e-04
Loss = 2.7070e-02, PNorm = 136.5409, GNorm = 0.5106, lr_0 = 6.5515e-04
Loss = 2.4949e-02, PNorm = 136.5890, GNorm = 0.6012, lr_0 = 6.5470e-04
Loss = 2.8113e-02, PNorm = 136.6362, GNorm = 1.1602, lr_0 = 6.5425e-04
Loss = 2.6070e-02, PNorm = 136.6870, GNorm = 0.2055, lr_0 = 6.5380e-04
Loss = 2.6945e-02, PNorm = 136.7355, GNorm = 0.5379, lr_0 = 6.5335e-04
Loss = 2.8143e-02, PNorm = 136.7828, GNorm = 0.3944, lr_0 = 6.5291e-04
Loss = 2.6553e-02, PNorm = 136.8275, GNorm = 0.3240, lr_0 = 6.5246e-04
Loss = 2.8312e-02, PNorm = 136.8739, GNorm = 0.6156, lr_0 = 6.5201e-04
Loss = 2.7331e-02, PNorm = 136.9220, GNorm = 0.2872, lr_0 = 6.5157e-04
Loss = 3.0838e-02, PNorm = 136.9706, GNorm = 0.4822, lr_0 = 6.5112e-04
Loss = 2.6682e-02, PNorm = 137.0257, GNorm = 0.2092, lr_0 = 6.5067e-04
Loss = 2.3878e-02, PNorm = 137.0782, GNorm = 0.2390, lr_0 = 6.5023e-04
Loss = 2.8990e-02, PNorm = 137.1334, GNorm = 0.4158, lr_0 = 6.4978e-04
Loss = 2.9851e-02, PNorm = 137.1863, GNorm = 0.5377, lr_0 = 6.4934e-04
Loss = 3.4550e-02, PNorm = 137.2396, GNorm = 0.9892, lr_0 = 6.4889e-04
Loss = 3.0044e-02, PNorm = 137.2918, GNorm = 0.2444, lr_0 = 6.4845e-04
Loss = 2.7047e-02, PNorm = 137.3467, GNorm = 0.2803, lr_0 = 6.4800e-04
Loss = 3.4440e-02, PNorm = 137.4000, GNorm = 0.3345, lr_0 = 6.4756e-04
Loss = 2.5293e-02, PNorm = 137.4598, GNorm = 0.3025, lr_0 = 6.4712e-04
Loss = 2.4874e-02, PNorm = 137.5094, GNorm = 0.5932, lr_0 = 6.4667e-04
Loss = 2.7846e-02, PNorm = 137.5611, GNorm = 0.2543, lr_0 = 6.4623e-04
Loss = 3.0169e-02, PNorm = 137.6121, GNorm = 0.4995, lr_0 = 6.4579e-04
Loss = 2.5268e-02, PNorm = 137.6648, GNorm = 0.4151, lr_0 = 6.4534e-04
Loss = 2.8446e-02, PNorm = 137.7169, GNorm = 0.7163, lr_0 = 6.4490e-04
Loss = 3.1238e-02, PNorm = 137.7774, GNorm = 0.7740, lr_0 = 6.4446e-04
Loss = 2.4590e-02, PNorm = 137.8355, GNorm = 0.4055, lr_0 = 6.4402e-04
Loss = 2.5943e-02, PNorm = 137.8911, GNorm = 0.4759, lr_0 = 6.4358e-04
Loss = 2.8201e-02, PNorm = 137.9377, GNorm = 1.0657, lr_0 = 6.4314e-04
Loss = 2.6694e-02, PNorm = 137.9920, GNorm = 0.7912, lr_0 = 6.4270e-04
Loss = 2.6737e-02, PNorm = 138.0391, GNorm = 0.6350, lr_0 = 6.4226e-04
Loss = 2.6910e-02, PNorm = 138.0916, GNorm = 0.6014, lr_0 = 6.4182e-04
Loss = 2.6131e-02, PNorm = 138.1429, GNorm = 0.3202, lr_0 = 6.4138e-04
Loss = 2.8781e-02, PNorm = 138.1964, GNorm = 0.3810, lr_0 = 6.4094e-04
Loss = 2.4835e-02, PNorm = 138.2437, GNorm = 0.6099, lr_0 = 6.4050e-04
Loss = 3.0648e-02, PNorm = 138.2953, GNorm = 0.4848, lr_0 = 6.4006e-04
Loss = 2.4349e-02, PNorm = 138.3499, GNorm = 0.3493, lr_0 = 6.3962e-04
Loss = 2.5923e-02, PNorm = 138.3958, GNorm = 0.3742, lr_0 = 6.3918e-04
Loss = 2.4688e-02, PNorm = 138.4516, GNorm = 0.2485, lr_0 = 6.3874e-04
Loss = 3.1340e-02, PNorm = 138.5052, GNorm = 0.8549, lr_0 = 6.3831e-04
Loss = 2.8921e-02, PNorm = 138.5660, GNorm = 0.8814, lr_0 = 6.3787e-04
Loss = 2.9431e-02, PNorm = 138.6212, GNorm = 0.7800, lr_0 = 6.3743e-04
Loss = 2.6168e-02, PNorm = 138.6836, GNorm = 0.7132, lr_0 = 6.3700e-04
Loss = 2.4957e-02, PNorm = 138.7344, GNorm = 0.3028, lr_0 = 6.3656e-04
Loss = 2.4782e-02, PNorm = 138.7793, GNorm = 0.7202, lr_0 = 6.3612e-04
Loss = 2.5759e-02, PNorm = 138.8358, GNorm = 0.2396, lr_0 = 6.3569e-04
Loss = 2.6038e-02, PNorm = 138.8901, GNorm = 0.5098, lr_0 = 6.3525e-04
Loss = 2.6196e-02, PNorm = 138.9379, GNorm = 0.3739, lr_0 = 6.3482e-04
Loss = 2.4946e-02, PNorm = 138.9878, GNorm = 0.6876, lr_0 = 6.3438e-04
Loss = 2.7919e-02, PNorm = 139.0381, GNorm = 0.5856, lr_0 = 6.3395e-04
Loss = 2.7579e-02, PNorm = 139.0950, GNorm = 0.3366, lr_0 = 6.3351e-04
Loss = 2.8194e-02, PNorm = 139.1504, GNorm = 0.2350, lr_0 = 6.3308e-04
Loss = 2.6749e-02, PNorm = 139.1984, GNorm = 0.3391, lr_0 = 6.3265e-04
Loss = 2.9260e-02, PNorm = 139.2503, GNorm = 0.3181, lr_0 = 6.3221e-04
Loss = 2.7319e-02, PNorm = 139.3039, GNorm = 0.6213, lr_0 = 6.3178e-04
Loss = 2.5279e-02, PNorm = 139.3592, GNorm = 0.2449, lr_0 = 6.3135e-04
Loss = 2.7489e-02, PNorm = 139.4125, GNorm = 0.2666, lr_0 = 6.3091e-04
Loss = 2.5844e-02, PNorm = 139.4700, GNorm = 0.3232, lr_0 = 6.3048e-04
Loss = 2.7429e-02, PNorm = 139.5246, GNorm = 0.4574, lr_0 = 6.3005e-04
Loss = 2.5257e-02, PNorm = 139.5782, GNorm = 0.5421, lr_0 = 6.2962e-04
Loss = 2.5394e-02, PNorm = 139.6234, GNorm = 0.3178, lr_0 = 6.2919e-04
Loss = 3.1007e-02, PNorm = 139.6803, GNorm = 0.4900, lr_0 = 6.2876e-04
Loss = 2.4685e-02, PNorm = 139.7348, GNorm = 0.2757, lr_0 = 6.2833e-04
Loss = 2.5173e-02, PNorm = 139.7891, GNorm = 0.3473, lr_0 = 6.2789e-04
Loss = 2.6161e-02, PNorm = 139.8408, GNorm = 0.2572, lr_0 = 6.2746e-04
Loss = 3.3734e-02, PNorm = 139.8947, GNorm = 0.3783, lr_0 = 6.2703e-04
Loss = 3.0545e-02, PNorm = 139.9531, GNorm = 0.9351, lr_0 = 6.2661e-04
Loss = 2.9939e-02, PNorm = 140.0111, GNorm = 0.8395, lr_0 = 6.2618e-04
Loss = 3.2140e-02, PNorm = 140.0783, GNorm = 0.2380, lr_0 = 6.2575e-04
Loss = 3.6609e-02, PNorm = 140.1385, GNorm = 0.8063, lr_0 = 6.2532e-04
Loss = 2.6884e-02, PNorm = 140.2039, GNorm = 0.1676, lr_0 = 6.2489e-04
Loss = 2.4676e-02, PNorm = 140.2651, GNorm = 0.3472, lr_0 = 6.2446e-04
Loss = 2.5778e-02, PNorm = 140.3208, GNorm = 0.3151, lr_0 = 6.2403e-04
Loss = 3.0751e-02, PNorm = 140.3715, GNorm = 0.5147, lr_0 = 6.2361e-04
Loss = 2.5971e-02, PNorm = 140.4325, GNorm = 0.5193, lr_0 = 6.2318e-04
Loss = 2.6941e-02, PNorm = 140.4870, GNorm = 0.4171, lr_0 = 6.2275e-04
Loss = 3.7522e-02, PNorm = 140.5425, GNorm = 0.2689, lr_0 = 6.2233e-04
Loss = 2.8222e-02, PNorm = 140.6079, GNorm = 0.6551, lr_0 = 6.2190e-04
Loss = 2.8774e-02, PNorm = 140.6689, GNorm = 0.2605, lr_0 = 6.2147e-04
Loss = 2.7857e-02, PNorm = 140.7251, GNorm = 0.2658, lr_0 = 6.2105e-04
Loss = 2.5611e-02, PNorm = 140.7893, GNorm = 0.5475, lr_0 = 6.2062e-04
Loss = 2.9001e-02, PNorm = 140.8521, GNorm = 0.3235, lr_0 = 6.2020e-04
Loss = 3.3264e-02, PNorm = 140.9072, GNorm = 0.5546, lr_0 = 6.1977e-04
Loss = 2.9637e-02, PNorm = 140.9666, GNorm = 0.7483, lr_0 = 6.1935e-04
Loss = 2.3689e-02, PNorm = 141.0246, GNorm = 0.3407, lr_0 = 6.1892e-04
Loss = 2.4060e-02, PNorm = 141.0726, GNorm = 0.2499, lr_0 = 6.1850e-04
Loss = 3.2885e-02, PNorm = 141.1228, GNorm = 0.3773, lr_0 = 6.1808e-04
Loss = 3.0151e-02, PNorm = 141.1815, GNorm = 0.5631, lr_0 = 6.1765e-04
Loss = 2.5816e-02, PNorm = 141.2393, GNorm = 0.4417, lr_0 = 6.1723e-04
Loss = 2.7055e-02, PNorm = 141.2898, GNorm = 0.6240, lr_0 = 6.1681e-04
Loss = 2.3694e-02, PNorm = 141.3447, GNorm = 0.6147, lr_0 = 6.1638e-04
Loss = 2.9811e-02, PNorm = 141.4017, GNorm = 0.7226, lr_0 = 6.1596e-04
Loss = 2.4640e-02, PNorm = 141.4629, GNorm = 0.2355, lr_0 = 6.1554e-04
Loss = 2.5863e-02, PNorm = 141.5203, GNorm = 0.5089, lr_0 = 6.1512e-04
Loss = 2.5290e-02, PNorm = 141.5745, GNorm = 0.3593, lr_0 = 6.1470e-04
Loss = 2.8588e-02, PNorm = 141.6328, GNorm = 0.7184, lr_0 = 6.1428e-04
Loss = 2.6141e-02, PNorm = 141.6905, GNorm = 0.5812, lr_0 = 6.1385e-04
Loss = 3.7811e-02, PNorm = 141.7482, GNorm = 0.2484, lr_0 = 6.1343e-04
Loss = 2.6655e-02, PNorm = 141.8018, GNorm = 0.2521, lr_0 = 6.1301e-04
Loss = 2.6258e-02, PNorm = 141.8546, GNorm = 1.0330, lr_0 = 6.1259e-04
Loss = 2.6907e-02, PNorm = 141.9093, GNorm = 0.2344, lr_0 = 6.1217e-04
Loss = 3.0923e-02, PNorm = 141.9693, GNorm = 0.5868, lr_0 = 6.1175e-04
Loss = 3.1839e-02, PNorm = 142.0303, GNorm = 0.1464, lr_0 = 6.1134e-04
Loss = 2.9181e-02, PNorm = 142.0915, GNorm = 0.3440, lr_0 = 6.1092e-04
Loss = 3.2552e-02, PNorm = 142.1599, GNorm = 0.2800, lr_0 = 6.1050e-04
Validation mae = 0.285451
Epoch 8
Loss = 2.8453e-02, PNorm = 142.2193, GNorm = 0.1757, lr_0 = 6.1008e-04
Loss = 2.3844e-02, PNorm = 142.2726, GNorm = 0.4207, lr_0 = 6.0966e-04
Loss = 2.5358e-02, PNorm = 142.3213, GNorm = 0.2776, lr_0 = 6.0924e-04
Loss = 2.1943e-02, PNorm = 142.3686, GNorm = 0.4482, lr_0 = 6.0883e-04
Loss = 2.1263e-02, PNorm = 142.4120, GNorm = 0.4986, lr_0 = 6.0841e-04
Loss = 2.4678e-02, PNorm = 142.4479, GNorm = 0.4716, lr_0 = 6.0799e-04
Loss = 2.0928e-02, PNorm = 142.4804, GNorm = 0.4576, lr_0 = 6.0758e-04
Loss = 2.4763e-02, PNorm = 142.5225, GNorm = 0.9356, lr_0 = 6.0716e-04
Loss = 2.6626e-02, PNorm = 142.5586, GNorm = 0.6481, lr_0 = 6.0674e-04
Loss = 2.2414e-02, PNorm = 142.6030, GNorm = 0.5317, lr_0 = 6.0633e-04
Loss = 2.1272e-02, PNorm = 142.6416, GNorm = 0.4128, lr_0 = 6.0591e-04
Loss = 2.6989e-02, PNorm = 142.6738, GNorm = 0.4302, lr_0 = 6.0550e-04
Loss = 2.3825e-02, PNorm = 142.7208, GNorm = 0.1957, lr_0 = 6.0508e-04
Loss = 2.5397e-02, PNorm = 142.7576, GNorm = 0.7285, lr_0 = 6.0467e-04
Loss = 2.6993e-02, PNorm = 142.8103, GNorm = 0.6348, lr_0 = 6.0425e-04
Loss = 2.8133e-02, PNorm = 142.8537, GNorm = 0.4959, lr_0 = 6.0384e-04
Loss = 2.4557e-02, PNorm = 142.9064, GNorm = 0.2927, lr_0 = 6.0343e-04
Loss = 2.2133e-02, PNorm = 142.9471, GNorm = 0.4378, lr_0 = 6.0301e-04
Loss = 1.8653e-02, PNorm = 142.9901, GNorm = 0.3894, lr_0 = 6.0260e-04
Loss = 2.0853e-02, PNorm = 143.0282, GNorm = 0.2966, lr_0 = 6.0219e-04
Loss = 2.2145e-02, PNorm = 143.0636, GNorm = 0.3365, lr_0 = 6.0178e-04
Loss = 2.0890e-02, PNorm = 143.1052, GNorm = 0.4146, lr_0 = 6.0136e-04
Loss = 2.5857e-02, PNorm = 143.1484, GNorm = 0.4421, lr_0 = 6.0095e-04
Loss = 1.9589e-02, PNorm = 143.1899, GNorm = 0.2228, lr_0 = 6.0054e-04
Loss = 2.1520e-02, PNorm = 143.2331, GNorm = 0.5174, lr_0 = 6.0013e-04
Loss = 2.0003e-02, PNorm = 143.2744, GNorm = 0.2354, lr_0 = 5.9972e-04
Loss = 1.8837e-02, PNorm = 143.3118, GNorm = 0.2142, lr_0 = 5.9931e-04
Loss = 1.8242e-02, PNorm = 143.3550, GNorm = 0.4356, lr_0 = 5.9890e-04
Loss = 1.7916e-02, PNorm = 143.3941, GNorm = 0.3566, lr_0 = 5.9849e-04
Loss = 1.7028e-02, PNorm = 143.4290, GNorm = 0.4541, lr_0 = 5.9808e-04
Loss = 2.1594e-02, PNorm = 143.4668, GNorm = 0.5108, lr_0 = 5.9767e-04
Loss = 2.3967e-02, PNorm = 143.5142, GNorm = 0.4545, lr_0 = 5.9726e-04
Loss = 1.9667e-02, PNorm = 143.5589, GNorm = 0.2866, lr_0 = 5.9685e-04
Loss = 2.2228e-02, PNorm = 143.6032, GNorm = 0.2522, lr_0 = 5.9644e-04
Loss = 2.3235e-02, PNorm = 143.6406, GNorm = 0.7379, lr_0 = 5.9603e-04
Loss = 2.1180e-02, PNorm = 143.6844, GNorm = 0.4110, lr_0 = 5.9562e-04
Loss = 1.9748e-02, PNorm = 143.7255, GNorm = 0.2742, lr_0 = 5.9521e-04
Loss = 2.2381e-02, PNorm = 143.7642, GNorm = 0.2406, lr_0 = 5.9481e-04
Loss = 2.0526e-02, PNorm = 143.8112, GNorm = 0.6897, lr_0 = 5.9440e-04
Loss = 2.0800e-02, PNorm = 143.8545, GNorm = 0.4657, lr_0 = 5.9399e-04
Loss = 1.8010e-02, PNorm = 143.8978, GNorm = 0.1779, lr_0 = 5.9358e-04
Loss = 1.9609e-02, PNorm = 143.9360, GNorm = 0.1582, lr_0 = 5.9318e-04
Loss = 2.2546e-02, PNorm = 143.9774, GNorm = 0.1984, lr_0 = 5.9277e-04
Loss = 2.0240e-02, PNorm = 144.0207, GNorm = 0.1377, lr_0 = 5.9236e-04
Loss = 2.2710e-02, PNorm = 144.0615, GNorm = 0.3354, lr_0 = 5.9196e-04
Loss = 2.5112e-02, PNorm = 144.1023, GNorm = 0.4215, lr_0 = 5.9155e-04
Loss = 2.3120e-02, PNorm = 144.1485, GNorm = 0.2388, lr_0 = 5.9115e-04
Loss = 1.8688e-02, PNorm = 144.1905, GNorm = 0.3494, lr_0 = 5.9074e-04
Loss = 2.4073e-02, PNorm = 144.2328, GNorm = 0.2826, lr_0 = 5.9034e-04
Loss = 2.1578e-02, PNorm = 144.2749, GNorm = 0.4522, lr_0 = 5.8993e-04
Loss = 3.0166e-02, PNorm = 144.3171, GNorm = 0.1916, lr_0 = 5.8953e-04
Loss = 2.0426e-02, PNorm = 144.3607, GNorm = 0.3227, lr_0 = 5.8913e-04
Loss = 2.3835e-02, PNorm = 144.4067, GNorm = 0.2490, lr_0 = 5.8872e-04
Loss = 2.0137e-02, PNorm = 144.4505, GNorm = 0.1417, lr_0 = 5.8832e-04
Loss = 1.9953e-02, PNorm = 144.4942, GNorm = 0.3542, lr_0 = 5.8792e-04
Loss = 2.3123e-02, PNorm = 144.5408, GNorm = 0.5355, lr_0 = 5.8751e-04
Loss = 2.1553e-02, PNorm = 144.5878, GNorm = 0.2073, lr_0 = 5.8711e-04
Loss = 2.5188e-02, PNorm = 144.6326, GNorm = 0.5649, lr_0 = 5.8671e-04
Loss = 2.1695e-02, PNorm = 144.6831, GNorm = 0.5013, lr_0 = 5.8631e-04
Loss = 1.9841e-02, PNorm = 144.7265, GNorm = 0.3773, lr_0 = 5.8591e-04
Loss = 1.9896e-02, PNorm = 144.7715, GNorm = 0.4050, lr_0 = 5.8550e-04
Loss = 1.8958e-02, PNorm = 144.8131, GNorm = 0.3516, lr_0 = 5.8510e-04
Loss = 1.7963e-02, PNorm = 144.8558, GNorm = 0.5794, lr_0 = 5.8470e-04
Loss = 2.0766e-02, PNorm = 144.8966, GNorm = 0.2459, lr_0 = 5.8430e-04
Loss = 2.3118e-02, PNorm = 144.9431, GNorm = 0.2027, lr_0 = 5.8390e-04
Loss = 1.9487e-02, PNorm = 144.9886, GNorm = 0.4518, lr_0 = 5.8350e-04
Loss = 1.9651e-02, PNorm = 145.0327, GNorm = 0.3183, lr_0 = 5.8310e-04
Loss = 2.1362e-02, PNorm = 145.0777, GNorm = 0.4681, lr_0 = 5.8270e-04
Loss = 2.0235e-02, PNorm = 145.1223, GNorm = 0.1707, lr_0 = 5.8230e-04
Loss = 2.3230e-02, PNorm = 145.1648, GNorm = 0.1816, lr_0 = 5.8190e-04
Loss = 2.1757e-02, PNorm = 145.2117, GNorm = 0.1938, lr_0 = 5.8151e-04
Loss = 2.1912e-02, PNorm = 145.2595, GNorm = 0.4589, lr_0 = 5.8111e-04
Loss = 1.6788e-02, PNorm = 145.3034, GNorm = 0.2583, lr_0 = 5.8071e-04
Loss = 1.9279e-02, PNorm = 145.3467, GNorm = 0.5660, lr_0 = 5.8031e-04
Loss = 1.9428e-02, PNorm = 145.3957, GNorm = 0.2435, lr_0 = 5.7991e-04
Loss = 2.1535e-02, PNorm = 145.4447, GNorm = 0.4235, lr_0 = 5.7952e-04
Loss = 2.1483e-02, PNorm = 145.4932, GNorm = 0.3118, lr_0 = 5.7912e-04
Loss = 2.1047e-02, PNorm = 145.5369, GNorm = 0.4187, lr_0 = 5.7872e-04
Loss = 2.1374e-02, PNorm = 145.5863, GNorm = 0.2704, lr_0 = 5.7833e-04
Loss = 1.5848e-02, PNorm = 145.6303, GNorm = 0.1846, lr_0 = 5.7793e-04
Loss = 2.1225e-02, PNorm = 145.6696, GNorm = 0.5904, lr_0 = 5.7753e-04
Loss = 2.2503e-02, PNorm = 145.7164, GNorm = 0.1793, lr_0 = 5.7714e-04
Loss = 2.1739e-02, PNorm = 145.7638, GNorm = 0.2723, lr_0 = 5.7674e-04
Loss = 2.2381e-02, PNorm = 145.8054, GNorm = 0.6138, lr_0 = 5.7635e-04
Loss = 2.5821e-02, PNorm = 145.8497, GNorm = 0.3773, lr_0 = 5.7595e-04
Loss = 2.2143e-02, PNorm = 145.9008, GNorm = 0.3127, lr_0 = 5.7556e-04
Loss = 2.0112e-02, PNorm = 145.9503, GNorm = 0.3208, lr_0 = 5.7516e-04
Loss = 2.6356e-02, PNorm = 146.0027, GNorm = 0.5233, lr_0 = 5.7477e-04
Loss = 2.6122e-02, PNorm = 146.0559, GNorm = 0.7075, lr_0 = 5.7438e-04
Loss = 2.4106e-02, PNorm = 146.1148, GNorm = 0.3300, lr_0 = 5.7398e-04
Loss = 2.4337e-02, PNorm = 146.1663, GNorm = 0.5039, lr_0 = 5.7359e-04
Loss = 2.3064e-02, PNorm = 146.2186, GNorm = 0.2821, lr_0 = 5.7320e-04
Loss = 2.3271e-02, PNorm = 146.2691, GNorm = 0.4246, lr_0 = 5.7280e-04
Loss = 1.9276e-02, PNorm = 146.3275, GNorm = 0.5197, lr_0 = 5.7241e-04
Loss = 2.2089e-02, PNorm = 146.3774, GNorm = 0.6713, lr_0 = 5.7202e-04
Loss = 2.2043e-02, PNorm = 146.4313, GNorm = 0.4608, lr_0 = 5.7163e-04
Loss = 2.5484e-02, PNorm = 146.4857, GNorm = 0.3007, lr_0 = 5.7124e-04
Loss = 2.2559e-02, PNorm = 146.5372, GNorm = 0.2579, lr_0 = 5.7084e-04
Loss = 2.2481e-02, PNorm = 146.5868, GNorm = 0.5016, lr_0 = 5.7045e-04
Loss = 2.3010e-02, PNorm = 146.6352, GNorm = 0.3395, lr_0 = 5.7006e-04
Loss = 2.4889e-02, PNorm = 146.6786, GNorm = 1.2438, lr_0 = 5.6967e-04
Loss = 2.5224e-02, PNorm = 146.7339, GNorm = 0.4472, lr_0 = 5.6928e-04
Loss = 2.2757e-02, PNorm = 146.7855, GNorm = 0.4348, lr_0 = 5.6889e-04
Loss = 2.1782e-02, PNorm = 146.8404, GNorm = 0.4898, lr_0 = 5.6850e-04
Loss = 3.0733e-02, PNorm = 146.8878, GNorm = 0.3614, lr_0 = 5.6811e-04
Loss = 2.4172e-02, PNorm = 146.9468, GNorm = 0.2934, lr_0 = 5.6772e-04
Loss = 2.5338e-02, PNorm = 147.0025, GNorm = 0.2796, lr_0 = 5.6733e-04
Loss = 2.3436e-02, PNorm = 147.0532, GNorm = 0.5445, lr_0 = 5.6695e-04
Loss = 2.4650e-02, PNorm = 147.1073, GNorm = 0.8540, lr_0 = 5.6656e-04
Loss = 2.5181e-02, PNorm = 147.1611, GNorm = 0.3043, lr_0 = 5.6617e-04
Loss = 2.1433e-02, PNorm = 147.2191, GNorm = 0.2164, lr_0 = 5.6578e-04
Loss = 2.4307e-02, PNorm = 147.2717, GNorm = 0.4387, lr_0 = 5.6539e-04
Loss = 2.1264e-02, PNorm = 147.3205, GNorm = 0.6166, lr_0 = 5.6501e-04
Loss = 2.0132e-02, PNorm = 147.3626, GNorm = 0.3072, lr_0 = 5.6462e-04
Loss = 2.1347e-02, PNorm = 147.4115, GNorm = 0.2403, lr_0 = 5.6423e-04
Loss = 2.6487e-02, PNorm = 147.4689, GNorm = 0.2328, lr_0 = 5.6385e-04
Loss = 2.1665e-02, PNorm = 147.5260, GNorm = 0.5720, lr_0 = 5.6346e-04
Loss = 2.1889e-02, PNorm = 147.5767, GNorm = 0.2904, lr_0 = 5.6307e-04
Loss = 2.0788e-02, PNorm = 147.6298, GNorm = 0.5651, lr_0 = 5.6269e-04
Loss = 2.2810e-02, PNorm = 147.6801, GNorm = 0.3689, lr_0 = 5.6230e-04
Validation mae = 0.285280
Epoch 9
Loss = 2.0102e-02, PNorm = 147.7235, GNorm = 0.4435, lr_0 = 5.6192e-04
Loss = 2.0364e-02, PNorm = 147.7572, GNorm = 0.2567, lr_0 = 5.6153e-04
Loss = 1.8062e-02, PNorm = 147.7914, GNorm = 0.7277, lr_0 = 5.6115e-04
Loss = 1.6096e-02, PNorm = 147.8222, GNorm = 0.3521, lr_0 = 5.6076e-04
Loss = 2.2133e-02, PNorm = 147.8528, GNorm = 0.1826, lr_0 = 5.6038e-04
Loss = 1.6765e-02, PNorm = 147.8823, GNorm = 0.2163, lr_0 = 5.6000e-04
Loss = 1.5750e-02, PNorm = 147.9078, GNorm = 0.2078, lr_0 = 5.5961e-04
Loss = 2.0288e-02, PNorm = 147.9383, GNorm = 0.3177, lr_0 = 5.5923e-04
Loss = 1.5539e-02, PNorm = 147.9678, GNorm = 0.2804, lr_0 = 5.5885e-04
Loss = 1.6242e-02, PNorm = 147.9965, GNorm = 0.4702, lr_0 = 5.5846e-04
Loss = 1.8333e-02, PNorm = 148.0342, GNorm = 0.5854, lr_0 = 5.5808e-04
Loss = 2.0173e-02, PNorm = 148.0650, GNorm = 0.2463, lr_0 = 5.5770e-04
Loss = 1.6727e-02, PNorm = 148.1033, GNorm = 0.3653, lr_0 = 5.5732e-04
Loss = 1.6917e-02, PNorm = 148.1390, GNorm = 0.4917, lr_0 = 5.5693e-04
Loss = 1.8864e-02, PNorm = 148.1741, GNorm = 0.2845, lr_0 = 5.5655e-04
Loss = 1.6024e-02, PNorm = 148.1987, GNorm = 0.4812, lr_0 = 5.5617e-04
Loss = 1.5848e-02, PNorm = 148.2273, GNorm = 0.4877, lr_0 = 5.5579e-04
Loss = 1.6191e-02, PNorm = 148.2566, GNorm = 0.3016, lr_0 = 5.5541e-04
Loss = 1.6229e-02, PNorm = 148.2895, GNorm = 0.3035, lr_0 = 5.5503e-04
Loss = 1.9029e-02, PNorm = 148.3204, GNorm = 0.3394, lr_0 = 5.5465e-04
Loss = 1.7028e-02, PNorm = 148.3528, GNorm = 0.4165, lr_0 = 5.5427e-04
Loss = 1.7551e-02, PNorm = 148.3845, GNorm = 0.7049, lr_0 = 5.5389e-04
Loss = 1.8377e-02, PNorm = 148.4152, GNorm = 0.4151, lr_0 = 5.5351e-04
Loss = 1.6421e-02, PNorm = 148.4479, GNorm = 0.3605, lr_0 = 5.5313e-04
Loss = 1.5281e-02, PNorm = 148.4821, GNorm = 0.3501, lr_0 = 5.5275e-04
Loss = 1.9493e-02, PNorm = 148.5185, GNorm = 0.2064, lr_0 = 5.5237e-04
Loss = 1.7724e-02, PNorm = 148.5505, GNorm = 0.1596, lr_0 = 5.5199e-04
Loss = 1.5608e-02, PNorm = 148.5824, GNorm = 0.3520, lr_0 = 5.5162e-04
Loss = 1.6921e-02, PNorm = 148.6084, GNorm = 0.2576, lr_0 = 5.5124e-04
Loss = 1.5590e-02, PNorm = 148.6404, GNorm = 0.4451, lr_0 = 5.5086e-04
Loss = 1.6674e-02, PNorm = 148.6743, GNorm = 0.5117, lr_0 = 5.5048e-04
Loss = 1.4827e-02, PNorm = 148.7066, GNorm = 0.1363, lr_0 = 5.5011e-04
Loss = 1.5340e-02, PNorm = 148.7351, GNorm = 0.4725, lr_0 = 5.4973e-04
Loss = 2.1010e-02, PNorm = 148.7684, GNorm = 0.7277, lr_0 = 5.4935e-04
Loss = 1.7661e-02, PNorm = 148.8078, GNorm = 0.3846, lr_0 = 5.4898e-04
Loss = 1.6523e-02, PNorm = 148.8399, GNorm = 0.1934, lr_0 = 5.4860e-04
Loss = 1.8523e-02, PNorm = 148.8745, GNorm = 0.3944, lr_0 = 5.4822e-04
Loss = 1.6575e-02, PNorm = 148.9106, GNorm = 0.2275, lr_0 = 5.4785e-04
Loss = 1.6955e-02, PNorm = 148.9451, GNorm = 0.3576, lr_0 = 5.4747e-04
Loss = 1.6029e-02, PNorm = 148.9811, GNorm = 0.4629, lr_0 = 5.4710e-04
Loss = 1.5314e-02, PNorm = 149.0125, GNorm = 0.1573, lr_0 = 5.4672e-04
Loss = 2.0185e-02, PNorm = 149.0416, GNorm = 0.1619, lr_0 = 5.4635e-04
Loss = 1.5139e-02, PNorm = 149.0761, GNorm = 0.4576, lr_0 = 5.4597e-04
Loss = 1.7563e-02, PNorm = 149.1104, GNorm = 0.1569, lr_0 = 5.4560e-04
Loss = 1.7963e-02, PNorm = 149.1455, GNorm = 0.2661, lr_0 = 5.4523e-04
Loss = 1.6734e-02, PNorm = 149.1798, GNorm = 0.5181, lr_0 = 5.4485e-04
Loss = 1.6067e-02, PNorm = 149.2168, GNorm = 0.1857, lr_0 = 5.4448e-04
Loss = 1.8862e-02, PNorm = 149.2562, GNorm = 0.2385, lr_0 = 5.4411e-04
Loss = 1.6548e-02, PNorm = 149.2923, GNorm = 0.2595, lr_0 = 5.4373e-04
Loss = 1.8059e-02, PNorm = 149.3249, GNorm = 0.8006, lr_0 = 5.4336e-04
Loss = 1.7863e-02, PNorm = 149.3610, GNorm = 0.4522, lr_0 = 5.4299e-04
Loss = 1.8693e-02, PNorm = 149.3927, GNorm = 0.2106, lr_0 = 5.4262e-04
Loss = 2.2569e-02, PNorm = 149.4332, GNorm = 0.3414, lr_0 = 5.4225e-04
Loss = 1.8453e-02, PNorm = 149.4741, GNorm = 0.2413, lr_0 = 5.4187e-04
Loss = 1.8482e-02, PNorm = 149.5123, GNorm = 0.4182, lr_0 = 5.4150e-04
Loss = 1.7657e-02, PNorm = 149.5548, GNorm = 0.3477, lr_0 = 5.4113e-04
Loss = 1.7954e-02, PNorm = 149.5983, GNorm = 0.3767, lr_0 = 5.4076e-04
Loss = 2.1405e-02, PNorm = 149.6380, GNorm = 0.4760, lr_0 = 5.4039e-04
Loss = 1.9906e-02, PNorm = 149.6836, GNorm = 0.4105, lr_0 = 5.4002e-04
Loss = 1.8354e-02, PNorm = 149.7189, GNorm = 0.6777, lr_0 = 5.3965e-04
Loss = 1.7146e-02, PNorm = 149.7644, GNorm = 0.4841, lr_0 = 5.3928e-04
Loss = 1.9163e-02, PNorm = 149.8017, GNorm = 0.3475, lr_0 = 5.3891e-04
Loss = 1.5922e-02, PNorm = 149.8438, GNorm = 0.4453, lr_0 = 5.3854e-04
Loss = 1.7802e-02, PNorm = 149.8814, GNorm = 0.3540, lr_0 = 5.3817e-04
Loss = 1.4102e-02, PNorm = 149.9197, GNorm = 0.3847, lr_0 = 5.3781e-04
Loss = 2.0243e-02, PNorm = 149.9610, GNorm = 0.2435, lr_0 = 5.3744e-04
Loss = 1.5564e-02, PNorm = 149.9996, GNorm = 0.4175, lr_0 = 5.3707e-04
Loss = 1.8991e-02, PNorm = 150.0362, GNorm = 0.3502, lr_0 = 5.3670e-04
Loss = 1.5667e-02, PNorm = 150.0725, GNorm = 0.2567, lr_0 = 5.3633e-04
Loss = 2.0186e-02, PNorm = 150.1065, GNorm = 0.4549, lr_0 = 5.3597e-04
Loss = 1.7410e-02, PNorm = 150.1427, GNorm = 0.3431, lr_0 = 5.3560e-04
Loss = 2.3536e-02, PNorm = 150.1784, GNorm = 0.4443, lr_0 = 5.3523e-04
Loss = 1.6713e-02, PNorm = 150.2199, GNorm = 0.3164, lr_0 = 5.3486e-04
Loss = 1.5992e-02, PNorm = 150.2586, GNorm = 0.4359, lr_0 = 5.3450e-04
Loss = 1.7564e-02, PNorm = 150.2967, GNorm = 0.3553, lr_0 = 5.3413e-04
Loss = 1.7476e-02, PNorm = 150.3368, GNorm = 0.9552, lr_0 = 5.3377e-04
Loss = 1.7690e-02, PNorm = 150.3718, GNorm = 0.2759, lr_0 = 5.3340e-04
Loss = 2.0764e-02, PNorm = 150.4144, GNorm = 0.2492, lr_0 = 5.3304e-04
Loss = 1.9014e-02, PNorm = 150.4563, GNorm = 0.5955, lr_0 = 5.3267e-04
Loss = 1.9776e-02, PNorm = 150.5010, GNorm = 0.3137, lr_0 = 5.3231e-04
Loss = 1.7217e-02, PNorm = 150.5368, GNorm = 0.3607, lr_0 = 5.3194e-04
Loss = 1.6908e-02, PNorm = 150.5742, GNorm = 0.2941, lr_0 = 5.3158e-04
Loss = 1.8884e-02, PNorm = 150.6116, GNorm = 0.2643, lr_0 = 5.3121e-04
Loss = 1.4912e-02, PNorm = 150.6457, GNorm = 0.4961, lr_0 = 5.3085e-04
Loss = 1.8005e-02, PNorm = 150.6796, GNorm = 0.3115, lr_0 = 5.3048e-04
Loss = 1.5607e-02, PNorm = 150.7141, GNorm = 0.6319, lr_0 = 5.3012e-04
Loss = 2.0711e-02, PNorm = 150.7531, GNorm = 0.3889, lr_0 = 5.2976e-04
Loss = 1.7345e-02, PNorm = 150.7912, GNorm = 0.1883, lr_0 = 5.2939e-04
Loss = 1.5452e-02, PNorm = 150.8290, GNorm = 0.1371, lr_0 = 5.2903e-04
Loss = 1.5956e-02, PNorm = 150.8639, GNorm = 0.4527, lr_0 = 5.2867e-04
Loss = 1.5953e-02, PNorm = 150.9036, GNorm = 0.7985, lr_0 = 5.2831e-04
Loss = 2.0827e-02, PNorm = 150.9446, GNorm = 0.1826, lr_0 = 5.2795e-04
Loss = 1.7385e-02, PNorm = 150.9888, GNorm = 0.7962, lr_0 = 5.2758e-04
Loss = 1.5387e-02, PNorm = 151.0242, GNorm = 0.1783, lr_0 = 5.2722e-04
Loss = 1.8920e-02, PNorm = 151.0634, GNorm = 0.3119, lr_0 = 5.2686e-04
Loss = 1.7033e-02, PNorm = 151.1006, GNorm = 0.4297, lr_0 = 5.2650e-04
Loss = 1.7685e-02, PNorm = 151.1423, GNorm = 0.2981, lr_0 = 5.2614e-04
Loss = 1.6076e-02, PNorm = 151.1834, GNorm = 0.1809, lr_0 = 5.2578e-04
Loss = 1.7776e-02, PNorm = 151.2244, GNorm = 0.4379, lr_0 = 5.2542e-04
Loss = 2.1908e-02, PNorm = 151.2589, GNorm = 0.1831, lr_0 = 5.2506e-04
Loss = 1.4258e-02, PNorm = 151.2941, GNorm = 0.1477, lr_0 = 5.2470e-04
Loss = 1.9420e-02, PNorm = 151.3344, GNorm = 0.2838, lr_0 = 5.2434e-04
Loss = 1.8605e-02, PNorm = 151.3750, GNorm = 0.2032, lr_0 = 5.2398e-04
Loss = 1.8538e-02, PNorm = 151.4158, GNorm = 0.5043, lr_0 = 5.2362e-04
Loss = 1.7949e-02, PNorm = 151.4575, GNorm = 0.4134, lr_0 = 5.2326e-04
Loss = 1.6689e-02, PNorm = 151.4969, GNorm = 0.1669, lr_0 = 5.2290e-04
Loss = 1.8756e-02, PNorm = 151.5426, GNorm = 0.2071, lr_0 = 5.2255e-04
Loss = 1.8742e-02, PNorm = 151.5785, GNorm = 0.5875, lr_0 = 5.2219e-04
Loss = 1.7706e-02, PNorm = 151.6203, GNorm = 0.4808, lr_0 = 5.2183e-04
Loss = 1.9174e-02, PNorm = 151.6596, GNorm = 0.1936, lr_0 = 5.2147e-04
Loss = 1.6988e-02, PNorm = 151.7019, GNorm = 0.6806, lr_0 = 5.2112e-04
Loss = 1.6469e-02, PNorm = 151.7447, GNorm = 0.4014, lr_0 = 5.2076e-04
Loss = 1.7423e-02, PNorm = 151.7869, GNorm = 0.4696, lr_0 = 5.2040e-04
Loss = 1.4622e-02, PNorm = 151.8249, GNorm = 0.1999, lr_0 = 5.2005e-04
Loss = 1.7612e-02, PNorm = 151.8591, GNorm = 0.3420, lr_0 = 5.1969e-04
Loss = 1.7321e-02, PNorm = 151.8919, GNorm = 0.3748, lr_0 = 5.1933e-04
Loss = 1.7511e-02, PNorm = 151.9294, GNorm = 0.3868, lr_0 = 5.1898e-04
Loss = 1.6821e-02, PNorm = 151.9738, GNorm = 0.3496, lr_0 = 5.1862e-04
Loss = 1.9925e-02, PNorm = 152.0091, GNorm = 0.3645, lr_0 = 5.1827e-04
Loss = 1.7713e-02, PNorm = 152.0490, GNorm = 0.3344, lr_0 = 5.1791e-04
Validation mae = 0.281815
Epoch 10
Loss = 1.5818e-02, PNorm = 152.0804, GNorm = 0.4383, lr_0 = 5.1756e-04
Loss = 1.7129e-02, PNorm = 152.1077, GNorm = 0.1994, lr_0 = 5.1720e-04
Loss = 1.3850e-02, PNorm = 152.1322, GNorm = 0.3348, lr_0 = 5.1685e-04
Loss = 1.7493e-02, PNorm = 152.1614, GNorm = 0.5239, lr_0 = 5.1649e-04
Loss = 1.7002e-02, PNorm = 152.1891, GNorm = 0.2890, lr_0 = 5.1614e-04
Loss = 1.2322e-02, PNorm = 152.2224, GNorm = 0.1590, lr_0 = 5.1579e-04
Loss = 1.5476e-02, PNorm = 152.2509, GNorm = 0.1333, lr_0 = 5.1543e-04
Loss = 1.4236e-02, PNorm = 152.2768, GNorm = 0.5449, lr_0 = 5.1508e-04
Loss = 1.3711e-02, PNorm = 152.3015, GNorm = 0.5820, lr_0 = 5.1473e-04
Loss = 1.3862e-02, PNorm = 152.3254, GNorm = 0.4497, lr_0 = 5.1437e-04
Loss = 1.5295e-02, PNorm = 152.3540, GNorm = 0.2998, lr_0 = 5.1402e-04
Loss = 2.0129e-02, PNorm = 152.3839, GNorm = 0.3892, lr_0 = 5.1367e-04
Loss = 1.3406e-02, PNorm = 152.4147, GNorm = 0.2616, lr_0 = 5.1332e-04
Loss = 2.1178e-02, PNorm = 152.4396, GNorm = 0.9172, lr_0 = 5.1297e-04
Loss = 1.4189e-02, PNorm = 152.4677, GNorm = 0.2786, lr_0 = 5.1262e-04
Loss = 1.5192e-02, PNorm = 152.4979, GNorm = 0.2046, lr_0 = 5.1226e-04
Loss = 1.6127e-02, PNorm = 152.5256, GNorm = 0.4201, lr_0 = 5.1191e-04
Loss = 1.3338e-02, PNorm = 152.5554, GNorm = 0.1792, lr_0 = 5.1156e-04
Loss = 1.4067e-02, PNorm = 152.5899, GNorm = 0.1804, lr_0 = 5.1121e-04
Loss = 1.9133e-02, PNorm = 152.6191, GNorm = 0.3476, lr_0 = 5.1086e-04
Loss = 1.1666e-02, PNorm = 152.6463, GNorm = 0.2097, lr_0 = 5.1051e-04
Loss = 1.6200e-02, PNorm = 152.6672, GNorm = 0.3988, lr_0 = 5.1016e-04
Loss = 1.3270e-02, PNorm = 152.6956, GNorm = 0.4355, lr_0 = 5.0981e-04
Loss = 1.4081e-02, PNorm = 152.7213, GNorm = 0.4759, lr_0 = 5.0946e-04
Loss = 1.3291e-02, PNorm = 152.7510, GNorm = 0.3265, lr_0 = 5.0911e-04
Loss = 1.6861e-02, PNorm = 152.7824, GNorm = 0.2947, lr_0 = 5.0877e-04
Loss = 1.6582e-02, PNorm = 152.8087, GNorm = 0.3432, lr_0 = 5.0842e-04
Loss = 1.3759e-02, PNorm = 152.8387, GNorm = 0.2513, lr_0 = 5.0807e-04
Loss = 1.4415e-02, PNorm = 152.8720, GNorm = 0.2027, lr_0 = 5.0772e-04
Loss = 1.4580e-02, PNorm = 152.9020, GNorm = 0.2876, lr_0 = 5.0737e-04
Loss = 1.2242e-02, PNorm = 152.9316, GNorm = 0.2542, lr_0 = 5.0703e-04
Loss = 1.5247e-02, PNorm = 152.9575, GNorm = 0.3628, lr_0 = 5.0668e-04
Loss = 1.3370e-02, PNorm = 152.9876, GNorm = 0.1320, lr_0 = 5.0633e-04
Loss = 1.4802e-02, PNorm = 153.0225, GNorm = 0.4984, lr_0 = 5.0598e-04
Loss = 1.2236e-02, PNorm = 153.0548, GNorm = 0.6804, lr_0 = 5.0564e-04
Loss = 1.4602e-02, PNorm = 153.0812, GNorm = 0.3032, lr_0 = 5.0529e-04
Loss = 1.6802e-02, PNorm = 153.1074, GNorm = 0.4960, lr_0 = 5.0494e-04
Loss = 1.3428e-02, PNorm = 153.1280, GNorm = 0.3881, lr_0 = 5.0460e-04
Loss = 1.5014e-02, PNorm = 153.1548, GNorm = 0.4980, lr_0 = 5.0425e-04
Loss = 1.4763e-02, PNorm = 153.1829, GNorm = 0.2731, lr_0 = 5.0391e-04
Loss = 1.3011e-02, PNorm = 153.2165, GNorm = 0.6625, lr_0 = 5.0356e-04
Loss = 1.2872e-02, PNorm = 153.2436, GNorm = 0.4255, lr_0 = 5.0322e-04
Loss = 1.4020e-02, PNorm = 153.2735, GNorm = 0.3026, lr_0 = 5.0287e-04
Loss = 1.1596e-02, PNorm = 153.3003, GNorm = 0.1745, lr_0 = 5.0253e-04
Loss = 1.3410e-02, PNorm = 153.3280, GNorm = 0.1496, lr_0 = 5.0218e-04
Loss = 1.2912e-02, PNorm = 153.3549, GNorm = 0.3569, lr_0 = 5.0184e-04
Loss = 1.7228e-02, PNorm = 153.3892, GNorm = 0.5997, lr_0 = 5.0150e-04
Loss = 1.2253e-02, PNorm = 153.4174, GNorm = 0.5220, lr_0 = 5.0115e-04
Loss = 1.4863e-02, PNorm = 153.4431, GNorm = 0.5068, lr_0 = 5.0081e-04
Loss = 1.5702e-02, PNorm = 153.4675, GNorm = 0.4826, lr_0 = 5.0047e-04
Loss = 1.4491e-02, PNorm = 153.5003, GNorm = 0.1885, lr_0 = 5.0012e-04
Loss = 1.2382e-02, PNorm = 153.5330, GNorm = 0.2426, lr_0 = 4.9978e-04
Loss = 1.5778e-02, PNorm = 153.5647, GNorm = 0.3753, lr_0 = 4.9944e-04
Loss = 1.2648e-02, PNorm = 153.5955, GNorm = 0.6043, lr_0 = 4.9910e-04
Loss = 1.3057e-02, PNorm = 153.6254, GNorm = 0.3704, lr_0 = 4.9875e-04
Loss = 1.5959e-02, PNorm = 153.6517, GNorm = 0.3058, lr_0 = 4.9841e-04
Loss = 1.3386e-02, PNorm = 153.6758, GNorm = 0.3290, lr_0 = 4.9807e-04
Loss = 1.3528e-02, PNorm = 153.7001, GNorm = 0.1386, lr_0 = 4.9773e-04
Loss = 1.3220e-02, PNorm = 153.7289, GNorm = 0.4886, lr_0 = 4.9739e-04
Loss = 1.2413e-02, PNorm = 153.7573, GNorm = 0.1347, lr_0 = 4.9705e-04
Loss = 1.2849e-02, PNorm = 153.7838, GNorm = 0.2343, lr_0 = 4.9671e-04
Loss = 1.4715e-02, PNorm = 153.8126, GNorm = 0.4155, lr_0 = 4.9637e-04
Loss = 1.3929e-02, PNorm = 153.8446, GNorm = 0.2311, lr_0 = 4.9603e-04
Loss = 1.7449e-02, PNorm = 153.8720, GNorm = 0.3901, lr_0 = 4.9569e-04
Loss = 1.4485e-02, PNorm = 153.9080, GNorm = 0.4283, lr_0 = 4.9535e-04
Loss = 1.4522e-02, PNorm = 153.9399, GNorm = 0.5041, lr_0 = 4.9501e-04
Loss = 1.3913e-02, PNorm = 153.9720, GNorm = 0.3948, lr_0 = 4.9467e-04
Loss = 1.4864e-02, PNorm = 153.9984, GNorm = 0.2580, lr_0 = 4.9433e-04
Loss = 1.1493e-02, PNorm = 154.0270, GNorm = 0.4192, lr_0 = 4.9399e-04
Loss = 1.4445e-02, PNorm = 154.0578, GNorm = 0.2872, lr_0 = 4.9365e-04
Loss = 1.2303e-02, PNorm = 154.0891, GNorm = 0.1805, lr_0 = 4.9332e-04
Loss = 1.5431e-02, PNorm = 154.1161, GNorm = 0.1606, lr_0 = 4.9298e-04
Loss = 1.5807e-02, PNorm = 154.1473, GNorm = 0.4162, lr_0 = 4.9264e-04
Loss = 1.2886e-02, PNorm = 154.1785, GNorm = 0.6099, lr_0 = 4.9230e-04
Loss = 1.6237e-02, PNorm = 154.2099, GNorm = 0.3712, lr_0 = 4.9197e-04
Loss = 1.4889e-02, PNorm = 154.2389, GNorm = 0.2985, lr_0 = 4.9163e-04
Loss = 1.3364e-02, PNorm = 154.2732, GNorm = 0.1699, lr_0 = 4.9129e-04
Loss = 1.2956e-02, PNorm = 154.3097, GNorm = 0.3354, lr_0 = 4.9095e-04
Loss = 1.1977e-02, PNorm = 154.3364, GNorm = 0.3387, lr_0 = 4.9062e-04
Loss = 1.4850e-02, PNorm = 154.3670, GNorm = 0.1371, lr_0 = 4.9028e-04
Loss = 1.1072e-02, PNorm = 154.3999, GNorm = 0.2957, lr_0 = 4.8995e-04
Loss = 1.2729e-02, PNorm = 154.4310, GNorm = 0.2936, lr_0 = 4.8961e-04
Loss = 1.4112e-02, PNorm = 154.4612, GNorm = 0.1890, lr_0 = 4.8928e-04
Loss = 1.4556e-02, PNorm = 154.4904, GNorm = 0.3704, lr_0 = 4.8894e-04
Loss = 1.5954e-02, PNorm = 154.5197, GNorm = 0.4261, lr_0 = 4.8861e-04
Loss = 1.2289e-02, PNorm = 154.5528, GNorm = 0.2126, lr_0 = 4.8827e-04
Loss = 1.2185e-02, PNorm = 154.5870, GNorm = 0.1599, lr_0 = 4.8794e-04
Loss = 1.6078e-02, PNorm = 154.6173, GNorm = 0.6355, lr_0 = 4.8760e-04
Loss = 1.7895e-02, PNorm = 154.6478, GNorm = 0.5411, lr_0 = 4.8727e-04
Loss = 1.5517e-02, PNorm = 154.6777, GNorm = 0.2669, lr_0 = 4.8693e-04
Loss = 1.3295e-02, PNorm = 154.7121, GNorm = 0.1976, lr_0 = 4.8660e-04
Loss = 1.6519e-02, PNorm = 154.7510, GNorm = 0.4351, lr_0 = 4.8627e-04
Loss = 1.1505e-02, PNorm = 154.7889, GNorm = 0.2850, lr_0 = 4.8593e-04
Loss = 1.4558e-02, PNorm = 154.8164, GNorm = 0.4918, lr_0 = 4.8560e-04
Loss = 1.6413e-02, PNorm = 154.8411, GNorm = 0.1732, lr_0 = 4.8527e-04
Loss = 1.2992e-02, PNorm = 154.8680, GNorm = 0.1989, lr_0 = 4.8494e-04
Loss = 1.4705e-02, PNorm = 154.9023, GNorm = 0.5090, lr_0 = 4.8460e-04
Loss = 1.4492e-02, PNorm = 154.9358, GNorm = 0.1771, lr_0 = 4.8427e-04
Loss = 1.1389e-02, PNorm = 154.9721, GNorm = 0.2968, lr_0 = 4.8394e-04
Loss = 1.5967e-02, PNorm = 155.0071, GNorm = 0.1510, lr_0 = 4.8361e-04
Loss = 1.4750e-02, PNorm = 155.0400, GNorm = 0.2771, lr_0 = 4.8328e-04
Loss = 1.5963e-02, PNorm = 155.0779, GNorm = 0.6409, lr_0 = 4.8295e-04
Loss = 1.4154e-02, PNorm = 155.1124, GNorm = 0.1993, lr_0 = 4.8262e-04
Loss = 1.3637e-02, PNorm = 155.1455, GNorm = 0.4261, lr_0 = 4.8228e-04
Loss = 1.3459e-02, PNorm = 155.1787, GNorm = 0.2385, lr_0 = 4.8195e-04
Loss = 1.2227e-02, PNorm = 155.2123, GNorm = 0.2985, lr_0 = 4.8162e-04
Loss = 1.4477e-02, PNorm = 155.2429, GNorm = 0.3015, lr_0 = 4.8129e-04
Loss = 1.5257e-02, PNorm = 155.2767, GNorm = 0.1314, lr_0 = 4.8096e-04
Loss = 1.3452e-02, PNorm = 155.3125, GNorm = 0.1908, lr_0 = 4.8064e-04
Loss = 1.3944e-02, PNorm = 155.3507, GNorm = 0.4266, lr_0 = 4.8031e-04
Loss = 1.3829e-02, PNorm = 155.3861, GNorm = 0.1442, lr_0 = 4.7998e-04
Loss = 1.5912e-02, PNorm = 155.4262, GNorm = 0.1946, lr_0 = 4.7965e-04
Loss = 1.3498e-02, PNorm = 155.4647, GNorm = 0.4012, lr_0 = 4.7932e-04
Loss = 1.3923e-02, PNorm = 155.4986, GNorm = 0.2455, lr_0 = 4.7899e-04
Loss = 1.5196e-02, PNorm = 155.5356, GNorm = 0.5598, lr_0 = 4.7866e-04
Loss = 1.1660e-02, PNorm = 155.5675, GNorm = 0.3037, lr_0 = 4.7833e-04
Loss = 1.5134e-02, PNorm = 155.6013, GNorm = 0.6463, lr_0 = 4.7801e-04
Loss = 1.5925e-02, PNorm = 155.6310, GNorm = 0.8114, lr_0 = 4.7768e-04
Loss = 1.3517e-02, PNorm = 155.6633, GNorm = 0.3529, lr_0 = 4.7735e-04
Loss = 1.3227e-02, PNorm = 155.6987, GNorm = 0.5183, lr_0 = 4.7703e-04
Validation mae = 0.281069
Epoch 11
Loss = 1.4102e-02, PNorm = 155.7302, GNorm = 0.4515, lr_0 = 4.7670e-04
Loss = 1.3890e-02, PNorm = 155.7566, GNorm = 0.1485, lr_0 = 4.7637e-04
Loss = 1.1570e-02, PNorm = 155.7831, GNorm = 0.1929, lr_0 = 4.7605e-04
Loss = 1.2866e-02, PNorm = 155.8029, GNorm = 0.3642, lr_0 = 4.7572e-04
Loss = 1.2150e-02, PNorm = 155.8286, GNorm = 0.1510, lr_0 = 4.7539e-04
Loss = 1.1500e-02, PNorm = 155.8527, GNorm = 0.1925, lr_0 = 4.7507e-04
Loss = 1.2408e-02, PNorm = 155.8744, GNorm = 0.2774, lr_0 = 4.7474e-04
Loss = 1.1429e-02, PNorm = 155.8925, GNorm = 0.1795, lr_0 = 4.7442e-04
Loss = 1.3980e-02, PNorm = 155.9161, GNorm = 0.4080, lr_0 = 4.7409e-04
Loss = 1.4411e-02, PNorm = 155.9406, GNorm = 0.4628, lr_0 = 4.7377e-04
Loss = 1.1281e-02, PNorm = 155.9703, GNorm = 0.1850, lr_0 = 4.7344e-04
Loss = 1.5896e-02, PNorm = 155.9994, GNorm = 0.2437, lr_0 = 4.7312e-04
Loss = 1.1124e-02, PNorm = 156.0245, GNorm = 0.2022, lr_0 = 4.7279e-04
Loss = 1.3217e-02, PNorm = 156.0442, GNorm = 0.1465, lr_0 = 4.7247e-04
Loss = 1.4627e-02, PNorm = 156.0653, GNorm = 0.1312, lr_0 = 4.7215e-04
Loss = 1.3411e-02, PNorm = 156.0906, GNorm = 0.3819, lr_0 = 4.7182e-04
Loss = 1.2647e-02, PNorm = 156.1203, GNorm = 0.6411, lr_0 = 4.7150e-04
Loss = 1.2869e-02, PNorm = 156.1490, GNorm = 0.2504, lr_0 = 4.7118e-04
Loss = 1.2439e-02, PNorm = 156.1804, GNorm = 0.3768, lr_0 = 4.7085e-04
Loss = 1.2199e-02, PNorm = 156.2070, GNorm = 0.3446, lr_0 = 4.7053e-04
Loss = 1.0922e-02, PNorm = 156.2298, GNorm = 0.1027, lr_0 = 4.7021e-04
Loss = 1.4402e-02, PNorm = 156.2566, GNorm = 0.2384, lr_0 = 4.6989e-04
Loss = 1.0510e-02, PNorm = 156.2871, GNorm = 0.4850, lr_0 = 4.6957e-04
Loss = 1.2982e-02, PNorm = 156.3140, GNorm = 0.1819, lr_0 = 4.6924e-04
Loss = 1.2646e-02, PNorm = 156.3397, GNorm = 0.1448, lr_0 = 4.6892e-04
Loss = 1.2103e-02, PNorm = 156.3628, GNorm = 0.1953, lr_0 = 4.6860e-04
Loss = 1.4682e-02, PNorm = 156.3842, GNorm = 0.2933, lr_0 = 4.6828e-04
Loss = 1.2250e-02, PNorm = 156.4096, GNorm = 0.6216, lr_0 = 4.6796e-04
Loss = 1.2388e-02, PNorm = 156.4368, GNorm = 0.1019, lr_0 = 4.6764e-04
Loss = 1.0745e-02, PNorm = 156.4641, GNorm = 0.1882, lr_0 = 4.6732e-04
Loss = 1.1304e-02, PNorm = 156.4934, GNorm = 0.1819, lr_0 = 4.6700e-04
Loss = 1.0363e-02, PNorm = 156.5194, GNorm = 0.3305, lr_0 = 4.6668e-04
Loss = 1.1187e-02, PNorm = 156.5436, GNorm = 0.2101, lr_0 = 4.6636e-04
Loss = 1.1931e-02, PNorm = 156.5691, GNorm = 0.5532, lr_0 = 4.6604e-04
Loss = 1.1318e-02, PNorm = 156.5962, GNorm = 0.2072, lr_0 = 4.6572e-04
Loss = 1.1332e-02, PNorm = 156.6256, GNorm = 0.3086, lr_0 = 4.6540e-04
Loss = 1.1693e-02, PNorm = 156.6494, GNorm = 0.3324, lr_0 = 4.6508e-04
Loss = 1.4174e-02, PNorm = 156.6741, GNorm = 0.2283, lr_0 = 4.6476e-04
Loss = 1.0454e-02, PNorm = 156.6990, GNorm = 0.5763, lr_0 = 4.6445e-04
Loss = 9.1081e-03, PNorm = 156.7249, GNorm = 0.1641, lr_0 = 4.6413e-04
Loss = 1.0282e-02, PNorm = 156.7521, GNorm = 0.1668, lr_0 = 4.6381e-04
Loss = 1.0882e-02, PNorm = 156.7756, GNorm = 0.2441, lr_0 = 4.6349e-04
Loss = 1.0025e-02, PNorm = 156.7975, GNorm = 0.4832, lr_0 = 4.6317e-04
Loss = 1.2229e-02, PNorm = 156.8242, GNorm = 0.3702, lr_0 = 4.6286e-04
Loss = 9.7379e-03, PNorm = 156.8466, GNorm = 0.1625, lr_0 = 4.6254e-04
Loss = 1.1015e-02, PNorm = 156.8705, GNorm = 0.2517, lr_0 = 4.6222e-04
Loss = 1.1042e-02, PNorm = 156.8949, GNorm = 0.3749, lr_0 = 4.6191e-04
Loss = 1.0513e-02, PNorm = 156.9175, GNorm = 0.3674, lr_0 = 4.6159e-04
Loss = 1.4581e-02, PNorm = 156.9460, GNorm = 0.1440, lr_0 = 4.6127e-04
Loss = 1.1234e-02, PNorm = 156.9769, GNorm = 0.1295, lr_0 = 4.6096e-04
Loss = 1.3785e-02, PNorm = 157.0013, GNorm = 0.3162, lr_0 = 4.6064e-04
Loss = 1.1691e-02, PNorm = 157.0236, GNorm = 0.4210, lr_0 = 4.6033e-04
Loss = 1.1324e-02, PNorm = 157.0415, GNorm = 0.4446, lr_0 = 4.6001e-04
Loss = 1.2005e-02, PNorm = 157.0683, GNorm = 0.3583, lr_0 = 4.5970e-04
Loss = 1.0138e-02, PNorm = 157.0949, GNorm = 0.2378, lr_0 = 4.5938e-04
Loss = 1.2364e-02, PNorm = 157.1257, GNorm = 0.3208, lr_0 = 4.5907e-04
Loss = 1.0940e-02, PNorm = 157.1576, GNorm = 0.3153, lr_0 = 4.5875e-04
Loss = 1.2879e-02, PNorm = 157.1828, GNorm = 0.4792, lr_0 = 4.5844e-04
Loss = 1.0735e-02, PNorm = 157.2074, GNorm = 0.1015, lr_0 = 4.5812e-04
Loss = 1.1180e-02, PNorm = 157.2266, GNorm = 0.3317, lr_0 = 4.5781e-04
Loss = 1.2756e-02, PNorm = 157.2570, GNorm = 0.2308, lr_0 = 4.5750e-04
Loss = 1.1230e-02, PNorm = 157.2867, GNorm = 0.1493, lr_0 = 4.5718e-04
Loss = 1.0752e-02, PNorm = 157.3127, GNorm = 0.2729, lr_0 = 4.5687e-04
Loss = 1.1881e-02, PNorm = 157.3368, GNorm = 0.6170, lr_0 = 4.5656e-04
Loss = 1.0558e-02, PNorm = 157.3619, GNorm = 0.1891, lr_0 = 4.5624e-04
Loss = 1.1446e-02, PNorm = 157.3871, GNorm = 0.2998, lr_0 = 4.5593e-04
Loss = 1.1018e-02, PNorm = 157.4139, GNorm = 0.1272, lr_0 = 4.5562e-04
Loss = 1.2965e-02, PNorm = 157.4412, GNorm = 0.3133, lr_0 = 4.5531e-04
Loss = 1.1408e-02, PNorm = 157.4694, GNorm = 0.1046, lr_0 = 4.5499e-04
Loss = 1.1418e-02, PNorm = 157.4972, GNorm = 0.3548, lr_0 = 4.5468e-04
Loss = 1.0461e-02, PNorm = 157.5236, GNorm = 0.7717, lr_0 = 4.5437e-04
Loss = 1.0979e-02, PNorm = 157.5489, GNorm = 0.1155, lr_0 = 4.5406e-04
Loss = 1.1781e-02, PNorm = 157.5775, GNorm = 0.2814, lr_0 = 4.5375e-04
Loss = 1.2188e-02, PNorm = 157.6035, GNorm = 0.5847, lr_0 = 4.5344e-04
Loss = 1.3782e-02, PNorm = 157.6304, GNorm = 0.2588, lr_0 = 4.5313e-04
Loss = 1.0648e-02, PNorm = 157.6617, GNorm = 0.2757, lr_0 = 4.5282e-04
Loss = 1.1042e-02, PNorm = 157.6943, GNorm = 0.3056, lr_0 = 4.5251e-04
Loss = 1.3882e-02, PNorm = 157.7234, GNorm = 0.4833, lr_0 = 4.5220e-04
Loss = 1.5372e-02, PNorm = 157.7511, GNorm = 0.5312, lr_0 = 4.5189e-04
Loss = 1.5081e-02, PNorm = 157.7887, GNorm = 0.3278, lr_0 = 4.5158e-04
Loss = 1.4853e-02, PNorm = 157.8270, GNorm = 0.2604, lr_0 = 4.5127e-04
Loss = 1.0998e-02, PNorm = 157.8666, GNorm = 0.3259, lr_0 = 4.5096e-04
Loss = 1.1942e-02, PNorm = 157.8989, GNorm = 0.1666, lr_0 = 4.5065e-04
Loss = 1.4251e-02, PNorm = 157.9323, GNorm = 0.2397, lr_0 = 4.5034e-04
Loss = 1.2008e-02, PNorm = 157.9669, GNorm = 0.2316, lr_0 = 4.5003e-04
Loss = 1.2143e-02, PNorm = 157.9969, GNorm = 0.4838, lr_0 = 4.4972e-04
Loss = 1.3860e-02, PNorm = 158.0227, GNorm = 0.1733, lr_0 = 4.4942e-04
Loss = 1.2233e-02, PNorm = 158.0501, GNorm = 0.1882, lr_0 = 4.4911e-04
Loss = 1.0335e-02, PNorm = 158.0779, GNorm = 0.1399, lr_0 = 4.4880e-04
Loss = 1.6197e-02, PNorm = 158.1057, GNorm = 0.6178, lr_0 = 4.4849e-04
Loss = 1.0427e-02, PNorm = 158.1286, GNorm = 0.1832, lr_0 = 4.4819e-04
Loss = 1.2033e-02, PNorm = 158.1563, GNorm = 0.1437, lr_0 = 4.4788e-04
Loss = 1.0748e-02, PNorm = 158.1848, GNorm = 0.4680, lr_0 = 4.4757e-04
Loss = 1.1859e-02, PNorm = 158.2093, GNorm = 0.4301, lr_0 = 4.4727e-04
Loss = 1.4322e-02, PNorm = 158.2344, GNorm = 0.2121, lr_0 = 4.4696e-04
Loss = 1.9417e-02, PNorm = 158.2599, GNorm = 0.2773, lr_0 = 4.4665e-04
Loss = 1.1432e-02, PNorm = 158.2893, GNorm = 0.1171, lr_0 = 4.4635e-04
Loss = 1.0982e-02, PNorm = 158.3182, GNorm = 0.6397, lr_0 = 4.4604e-04
Loss = 1.2687e-02, PNorm = 158.3483, GNorm = 0.4526, lr_0 = 4.4574e-04
Loss = 1.3032e-02, PNorm = 158.3754, GNorm = 0.2228, lr_0 = 4.4543e-04
Loss = 1.1665e-02, PNorm = 158.4053, GNorm = 0.3955, lr_0 = 4.4513e-04
Loss = 1.0480e-02, PNorm = 158.4347, GNorm = 0.3956, lr_0 = 4.4482e-04
Loss = 1.3856e-02, PNorm = 158.4632, GNorm = 0.7618, lr_0 = 4.4452e-04
Loss = 1.2302e-02, PNorm = 158.4909, GNorm = 0.2019, lr_0 = 4.4421e-04
Loss = 1.0881e-02, PNorm = 158.5199, GNorm = 0.1240, lr_0 = 4.4391e-04
Loss = 1.1843e-02, PNorm = 158.5466, GNorm = 0.1905, lr_0 = 4.4360e-04
Loss = 1.2300e-02, PNorm = 158.5715, GNorm = 0.3373, lr_0 = 4.4330e-04
Loss = 1.2615e-02, PNorm = 158.6039, GNorm = 0.2631, lr_0 = 4.4299e-04
Loss = 1.0996e-02, PNorm = 158.6360, GNorm = 0.4663, lr_0 = 4.4269e-04
Loss = 1.2834e-02, PNorm = 158.6633, GNorm = 0.1253, lr_0 = 4.4239e-04
Loss = 1.3553e-02, PNorm = 158.6898, GNorm = 0.1959, lr_0 = 4.4209e-04
Loss = 1.0317e-02, PNorm = 158.7154, GNorm = 0.1283, lr_0 = 4.4178e-04
Loss = 1.2799e-02, PNorm = 158.7421, GNorm = 0.3911, lr_0 = 4.4148e-04
Loss = 1.4655e-02, PNorm = 158.7688, GNorm = 0.4023, lr_0 = 4.4118e-04
Loss = 1.2100e-02, PNorm = 158.7927, GNorm = 0.3895, lr_0 = 4.4088e-04
Loss = 1.1416e-02, PNorm = 158.8175, GNorm = 0.4948, lr_0 = 4.4057e-04
Loss = 9.5009e-03, PNorm = 158.8454, GNorm = 0.2982, lr_0 = 4.4027e-04
Loss = 1.1490e-02, PNorm = 158.8732, GNorm = 0.1867, lr_0 = 4.3997e-04
Loss = 1.1870e-02, PNorm = 158.9016, GNorm = 0.2967, lr_0 = 4.3967e-04
Loss = 1.1709e-02, PNorm = 158.9341, GNorm = 0.1827, lr_0 = 4.3937e-04
Validation mae = 0.280557
Epoch 12
Loss = 1.0577e-02, PNorm = 158.9584, GNorm = 0.2435, lr_0 = 4.3907e-04
Loss = 1.0609e-02, PNorm = 158.9813, GNorm = 0.5670, lr_0 = 4.3877e-04
Loss = 1.1661e-02, PNorm = 159.0009, GNorm = 0.3462, lr_0 = 4.3846e-04
Loss = 1.0765e-02, PNorm = 159.0225, GNorm = 0.2689, lr_0 = 4.3816e-04
Loss = 9.3768e-03, PNorm = 159.0428, GNorm = 0.2120, lr_0 = 4.3786e-04
Loss = 1.0962e-02, PNorm = 159.0635, GNorm = 0.2234, lr_0 = 4.3756e-04
Loss = 1.1082e-02, PNorm = 159.0827, GNorm = 0.2393, lr_0 = 4.3726e-04
Loss = 1.0052e-02, PNorm = 159.0982, GNorm = 0.2906, lr_0 = 4.3696e-04
Loss = 9.0185e-03, PNorm = 159.1168, GNorm = 0.1506, lr_0 = 4.3667e-04
Loss = 8.8746e-03, PNorm = 159.1350, GNorm = 0.3219, lr_0 = 4.3637e-04
Loss = 1.1034e-02, PNorm = 159.1526, GNorm = 0.1654, lr_0 = 4.3607e-04
Loss = 1.1813e-02, PNorm = 159.1671, GNorm = 0.2523, lr_0 = 4.3577e-04
Loss = 9.4548e-03, PNorm = 159.1849, GNorm = 0.2021, lr_0 = 4.3547e-04
Loss = 1.0494e-02, PNorm = 159.2069, GNorm = 0.5152, lr_0 = 4.3517e-04
Loss = 1.0334e-02, PNorm = 159.2284, GNorm = 0.1704, lr_0 = 4.3487e-04
Loss = 1.2534e-02, PNorm = 159.2478, GNorm = 0.1619, lr_0 = 4.3458e-04
Loss = 9.5219e-03, PNorm = 159.2676, GNorm = 0.1832, lr_0 = 4.3428e-04
Loss = 1.2123e-02, PNorm = 159.2871, GNorm = 0.2023, lr_0 = 4.3398e-04
Loss = 1.0655e-02, PNorm = 159.3064, GNorm = 0.2722, lr_0 = 4.3368e-04
Loss = 1.0415e-02, PNorm = 159.3253, GNorm = 0.2741, lr_0 = 4.3339e-04
Loss = 1.2887e-02, PNorm = 159.3447, GNorm = 0.4819, lr_0 = 4.3309e-04
Loss = 1.0447e-02, PNorm = 159.3648, GNorm = 0.2692, lr_0 = 4.3279e-04
Loss = 1.2014e-02, PNorm = 159.3901, GNorm = 0.1319, lr_0 = 4.3250e-04
Loss = 1.0976e-02, PNorm = 159.4113, GNorm = 0.2097, lr_0 = 4.3220e-04
Loss = 8.0665e-03, PNorm = 159.4348, GNorm = 0.1787, lr_0 = 4.3190e-04
Loss = 9.8975e-03, PNorm = 159.4578, GNorm = 0.2271, lr_0 = 4.3161e-04
Loss = 9.5818e-03, PNorm = 159.4825, GNorm = 0.2227, lr_0 = 4.3131e-04
Loss = 9.7883e-03, PNorm = 159.5020, GNorm = 0.1287, lr_0 = 4.3102e-04
Loss = 1.1034e-02, PNorm = 159.5225, GNorm = 0.4292, lr_0 = 4.3072e-04
Loss = 9.4656e-03, PNorm = 159.5421, GNorm = 0.2590, lr_0 = 4.3043e-04
Loss = 1.0271e-02, PNorm = 159.5623, GNorm = 0.1442, lr_0 = 4.3013e-04
Loss = 1.0027e-02, PNorm = 159.5808, GNorm = 0.4132, lr_0 = 4.2984e-04
Loss = 1.0906e-02, PNorm = 159.6069, GNorm = 0.2158, lr_0 = 4.2954e-04
Loss = 9.6094e-03, PNorm = 159.6310, GNorm = 0.3540, lr_0 = 4.2925e-04
Loss = 9.7913e-03, PNorm = 159.6527, GNorm = 0.2044, lr_0 = 4.2895e-04
Loss = 9.1624e-03, PNorm = 159.6744, GNorm = 0.1570, lr_0 = 4.2866e-04
Loss = 1.0381e-02, PNorm = 159.6940, GNorm = 0.2748, lr_0 = 4.2837e-04
Loss = 1.1047e-02, PNorm = 159.7104, GNorm = 0.5894, lr_0 = 4.2807e-04
Loss = 1.0912e-02, PNorm = 159.7320, GNorm = 0.2430, lr_0 = 4.2778e-04
Loss = 1.1962e-02, PNorm = 159.7503, GNorm = 0.5932, lr_0 = 4.2749e-04
Loss = 9.7783e-03, PNorm = 159.7717, GNorm = 0.3728, lr_0 = 4.2719e-04
Loss = 9.0626e-03, PNorm = 159.7930, GNorm = 0.1916, lr_0 = 4.2690e-04
Loss = 1.3578e-02, PNorm = 159.8184, GNorm = 0.3328, lr_0 = 4.2661e-04
Loss = 1.0200e-02, PNorm = 159.8463, GNorm = 0.2621, lr_0 = 4.2632e-04
Loss = 1.0080e-02, PNorm = 159.8671, GNorm = 0.3117, lr_0 = 4.2602e-04
Loss = 1.1381e-02, PNorm = 159.8882, GNorm = 0.4269, lr_0 = 4.2573e-04
Loss = 9.6746e-03, PNorm = 159.9061, GNorm = 0.4686, lr_0 = 4.2544e-04
Loss = 1.2760e-02, PNorm = 159.9261, GNorm = 0.1993, lr_0 = 4.2515e-04
Loss = 9.4128e-03, PNorm = 159.9530, GNorm = 0.2815, lr_0 = 4.2486e-04
Loss = 1.1742e-02, PNorm = 159.9800, GNorm = 0.7966, lr_0 = 4.2457e-04
Loss = 1.0818e-02, PNorm = 160.0035, GNorm = 0.4503, lr_0 = 4.2428e-04
Loss = 1.0355e-02, PNorm = 160.0198, GNorm = 0.3654, lr_0 = 4.2399e-04
Loss = 1.2036e-02, PNorm = 160.0400, GNorm = 0.3251, lr_0 = 4.2370e-04
Loss = 1.0404e-02, PNorm = 160.0628, GNorm = 0.2442, lr_0 = 4.2340e-04
Loss = 1.0730e-02, PNorm = 160.0867, GNorm = 0.4394, lr_0 = 4.2311e-04
Loss = 1.0384e-02, PNorm = 160.1088, GNorm = 0.2145, lr_0 = 4.2283e-04
Loss = 9.7723e-03, PNorm = 160.1312, GNorm = 0.3905, lr_0 = 4.2254e-04
Loss = 1.0550e-02, PNorm = 160.1586, GNorm = 0.2533, lr_0 = 4.2225e-04
Loss = 1.0248e-02, PNorm = 160.1853, GNorm = 0.2730, lr_0 = 4.2196e-04
Loss = 9.3286e-03, PNorm = 160.2089, GNorm = 0.1235, lr_0 = 4.2167e-04
Loss = 8.7463e-03, PNorm = 160.2329, GNorm = 0.4649, lr_0 = 4.2138e-04
Loss = 1.0880e-02, PNorm = 160.2586, GNorm = 0.2612, lr_0 = 4.2109e-04
Loss = 8.7349e-03, PNorm = 160.2827, GNorm = 0.3020, lr_0 = 4.2080e-04
Loss = 1.0819e-02, PNorm = 160.3048, GNorm = 0.2029, lr_0 = 4.2051e-04
Loss = 9.3746e-03, PNorm = 160.3267, GNorm = 0.1234, lr_0 = 4.2023e-04
Loss = 8.1427e-03, PNorm = 160.3476, GNorm = 0.3540, lr_0 = 4.1994e-04
Loss = 9.7999e-03, PNorm = 160.3678, GNorm = 0.1735, lr_0 = 4.1965e-04
Loss = 1.1203e-02, PNorm = 160.3876, GNorm = 0.2137, lr_0 = 4.1936e-04
Loss = 1.0393e-02, PNorm = 160.4130, GNorm = 0.1648, lr_0 = 4.1907e-04
Loss = 9.9599e-03, PNorm = 160.4379, GNorm = 0.1124, lr_0 = 4.1879e-04
Loss = 9.4371e-03, PNorm = 160.4595, GNorm = 0.1989, lr_0 = 4.1850e-04
Loss = 1.0188e-02, PNorm = 160.4831, GNorm = 0.2267, lr_0 = 4.1821e-04
Loss = 1.0233e-02, PNorm = 160.5048, GNorm = 0.1658, lr_0 = 4.1793e-04
Loss = 1.1655e-02, PNorm = 160.5214, GNorm = 0.3313, lr_0 = 4.1764e-04
Loss = 9.9952e-03, PNorm = 160.5444, GNorm = 0.2377, lr_0 = 4.1736e-04
Loss = 9.5909e-03, PNorm = 160.5714, GNorm = 0.2586, lr_0 = 4.1707e-04
Loss = 1.1155e-02, PNorm = 160.5933, GNorm = 0.2416, lr_0 = 4.1678e-04
Loss = 1.0122e-02, PNorm = 160.6208, GNorm = 0.1194, lr_0 = 4.1650e-04
Loss = 8.2951e-03, PNorm = 160.6474, GNorm = 0.4767, lr_0 = 4.1621e-04
Loss = 9.8310e-03, PNorm = 160.6718, GNorm = 0.3133, lr_0 = 4.1593e-04
Loss = 1.0111e-02, PNorm = 160.6953, GNorm = 0.2115, lr_0 = 4.1564e-04
Loss = 9.5578e-03, PNorm = 160.7210, GNorm = 0.2754, lr_0 = 4.1536e-04
Loss = 1.0242e-02, PNorm = 160.7423, GNorm = 0.1181, lr_0 = 4.1507e-04
Loss = 1.1160e-02, PNorm = 160.7601, GNorm = 0.1675, lr_0 = 4.1479e-04
Loss = 9.6003e-03, PNorm = 160.7773, GNorm = 0.1494, lr_0 = 4.1450e-04
Loss = 1.3852e-02, PNorm = 160.7965, GNorm = 0.2796, lr_0 = 4.1422e-04
Loss = 9.7117e-03, PNorm = 160.8204, GNorm = 0.2530, lr_0 = 4.1394e-04
Loss = 8.2756e-03, PNorm = 160.8431, GNorm = 0.2906, lr_0 = 4.1365e-04
Loss = 1.2662e-02, PNorm = 160.8673, GNorm = 0.2862, lr_0 = 4.1337e-04
Loss = 8.4725e-03, PNorm = 160.8908, GNorm = 0.3916, lr_0 = 4.1309e-04
Loss = 1.2054e-02, PNorm = 160.9138, GNorm = 0.0944, lr_0 = 4.1280e-04
Loss = 9.0259e-03, PNorm = 160.9384, GNorm = 0.1612, lr_0 = 4.1252e-04
Loss = 1.0929e-02, PNorm = 160.9593, GNorm = 0.2616, lr_0 = 4.1224e-04
Loss = 9.7597e-03, PNorm = 160.9795, GNorm = 0.2710, lr_0 = 4.1196e-04
Loss = 1.1811e-02, PNorm = 161.0043, GNorm = 0.4433, lr_0 = 4.1167e-04
Loss = 1.1159e-02, PNorm = 161.0301, GNorm = 0.1719, lr_0 = 4.1139e-04
Loss = 8.8310e-03, PNorm = 161.0576, GNorm = 0.4710, lr_0 = 4.1111e-04
Loss = 1.0277e-02, PNorm = 161.0870, GNorm = 0.1910, lr_0 = 4.1083e-04
Loss = 9.5093e-03, PNorm = 161.1130, GNorm = 0.2733, lr_0 = 4.1055e-04
Loss = 1.6651e-02, PNorm = 161.1354, GNorm = 0.7850, lr_0 = 4.1027e-04
Loss = 9.8801e-03, PNorm = 161.1580, GNorm = 0.3613, lr_0 = 4.0998e-04
Loss = 1.0354e-02, PNorm = 161.1805, GNorm = 0.3202, lr_0 = 4.0970e-04
Loss = 9.3746e-03, PNorm = 161.2052, GNorm = 0.1643, lr_0 = 4.0942e-04
Loss = 1.0089e-02, PNorm = 161.2304, GNorm = 0.4586, lr_0 = 4.0914e-04
Loss = 1.1262e-02, PNorm = 161.2545, GNorm = 0.1912, lr_0 = 4.0886e-04
Loss = 1.1188e-02, PNorm = 161.2758, GNorm = 0.3218, lr_0 = 4.0858e-04
Loss = 1.1224e-02, PNorm = 161.2978, GNorm = 0.2797, lr_0 = 4.0830e-04
Loss = 1.1811e-02, PNorm = 161.3198, GNorm = 0.1523, lr_0 = 4.0802e-04
Loss = 9.7605e-03, PNorm = 161.3426, GNorm = 0.1230, lr_0 = 4.0774e-04
Loss = 9.2791e-03, PNorm = 161.3643, GNorm = 0.3744, lr_0 = 4.0746e-04
Loss = 1.0426e-02, PNorm = 161.3884, GNorm = 0.3022, lr_0 = 4.0718e-04
Loss = 1.0637e-02, PNorm = 161.4137, GNorm = 0.4439, lr_0 = 4.0691e-04
Loss = 1.1474e-02, PNorm = 161.4381, GNorm = 0.3417, lr_0 = 4.0663e-04
Loss = 1.0452e-02, PNorm = 161.4634, GNorm = 0.1242, lr_0 = 4.0635e-04
Loss = 9.2705e-03, PNorm = 161.4864, GNorm = 0.2762, lr_0 = 4.0607e-04
Loss = 1.1554e-02, PNorm = 161.5105, GNorm = 0.2872, lr_0 = 4.0579e-04
Loss = 1.1685e-02, PNorm = 161.5303, GNorm = 0.2882, lr_0 = 4.0551e-04
Loss = 1.1645e-02, PNorm = 161.5522, GNorm = 0.1749, lr_0 = 4.0524e-04
Loss = 9.1722e-03, PNorm = 161.5770, GNorm = 0.2280, lr_0 = 4.0496e-04
Loss = 9.5223e-03, PNorm = 161.5992, GNorm = 0.1309, lr_0 = 4.0468e-04
Validation mae = 0.279775
Epoch 13
Loss = 8.6632e-03, PNorm = 161.6157, GNorm = 0.0956, lr_0 = 4.0440e-04
Loss = 1.0281e-02, PNorm = 161.6290, GNorm = 0.2997, lr_0 = 4.0413e-04
Loss = 8.0563e-03, PNorm = 161.6435, GNorm = 0.2047, lr_0 = 4.0385e-04
Loss = 9.1384e-03, PNorm = 161.6569, GNorm = 0.3920, lr_0 = 4.0357e-04
Loss = 1.0994e-02, PNorm = 161.6764, GNorm = 0.1524, lr_0 = 4.0330e-04
Loss = 9.8396e-03, PNorm = 161.6925, GNorm = 0.0972, lr_0 = 4.0302e-04
Loss = 9.6122e-03, PNorm = 161.7138, GNorm = 0.2951, lr_0 = 4.0274e-04
Loss = 1.0424e-02, PNorm = 161.7324, GNorm = 0.1460, lr_0 = 4.0247e-04
Loss = 8.3089e-03, PNorm = 161.7519, GNorm = 0.1819, lr_0 = 4.0219e-04
Loss = 1.1278e-02, PNorm = 161.7703, GNorm = 0.2331, lr_0 = 4.0192e-04
Loss = 8.8226e-03, PNorm = 161.7884, GNorm = 0.2621, lr_0 = 4.0164e-04
Loss = 8.9466e-03, PNorm = 161.8034, GNorm = 0.2268, lr_0 = 4.0137e-04
Loss = 1.1949e-02, PNorm = 161.8199, GNorm = 0.2524, lr_0 = 4.0109e-04
Loss = 1.0026e-02, PNorm = 161.8389, GNorm = 0.3213, lr_0 = 4.0082e-04
Loss = 8.9250e-03, PNorm = 161.8554, GNorm = 0.1615, lr_0 = 4.0054e-04
Loss = 8.9246e-03, PNorm = 161.8697, GNorm = 0.1718, lr_0 = 4.0027e-04
Loss = 7.6326e-03, PNorm = 161.8845, GNorm = 0.3011, lr_0 = 3.9999e-04
Loss = 9.0071e-03, PNorm = 161.9015, GNorm = 0.1114, lr_0 = 3.9972e-04
Loss = 1.0584e-02, PNorm = 161.9184, GNorm = 0.2267, lr_0 = 3.9945e-04
Loss = 7.8916e-03, PNorm = 161.9366, GNorm = 0.3692, lr_0 = 3.9917e-04
Loss = 1.0203e-02, PNorm = 161.9510, GNorm = 0.3526, lr_0 = 3.9890e-04
Loss = 8.6347e-03, PNorm = 161.9688, GNorm = 0.6050, lr_0 = 3.9863e-04
Loss = 8.3616e-03, PNorm = 161.9867, GNorm = 0.3221, lr_0 = 3.9835e-04
Loss = 7.1983e-03, PNorm = 161.9993, GNorm = 0.4347, lr_0 = 3.9808e-04
Loss = 8.3571e-03, PNorm = 162.0156, GNorm = 0.1236, lr_0 = 3.9781e-04
Loss = 8.3910e-03, PNorm = 162.0327, GNorm = 0.1673, lr_0 = 3.9753e-04
Loss = 8.3643e-03, PNorm = 162.0478, GNorm = 0.2663, lr_0 = 3.9726e-04
Loss = 7.2266e-03, PNorm = 162.0650, GNorm = 0.3535, lr_0 = 3.9699e-04
Loss = 7.9402e-03, PNorm = 162.0828, GNorm = 0.1248, lr_0 = 3.9672e-04
Loss = 9.8785e-03, PNorm = 162.1005, GNorm = 0.2681, lr_0 = 3.9645e-04
Loss = 9.0184e-03, PNorm = 162.1173, GNorm = 0.5691, lr_0 = 3.9617e-04
Loss = 8.7466e-03, PNorm = 162.1338, GNorm = 0.2744, lr_0 = 3.9590e-04
Loss = 8.0240e-03, PNorm = 162.1523, GNorm = 0.1610, lr_0 = 3.9563e-04
Loss = 7.5068e-03, PNorm = 162.1711, GNorm = 0.2855, lr_0 = 3.9536e-04
Loss = 7.7403e-03, PNorm = 162.1895, GNorm = 0.3575, lr_0 = 3.9509e-04
Loss = 1.0466e-02, PNorm = 162.2104, GNorm = 0.2239, lr_0 = 3.9482e-04
Loss = 7.2053e-03, PNorm = 162.2322, GNorm = 0.1771, lr_0 = 3.9455e-04
Loss = 7.9955e-03, PNorm = 162.2504, GNorm = 0.0942, lr_0 = 3.9428e-04
Loss = 8.7208e-03, PNorm = 162.2649, GNorm = 0.2651, lr_0 = 3.9401e-04
Loss = 7.7015e-03, PNorm = 162.2800, GNorm = 0.1217, lr_0 = 3.9374e-04
Loss = 7.0839e-03, PNorm = 162.2976, GNorm = 0.1680, lr_0 = 3.9347e-04
Loss = 8.6149e-03, PNorm = 162.3194, GNorm = 0.2599, lr_0 = 3.9320e-04
Loss = 6.7440e-03, PNorm = 162.3378, GNorm = 0.2652, lr_0 = 3.9293e-04
Loss = 8.9385e-03, PNorm = 162.3547, GNorm = 0.3151, lr_0 = 3.9266e-04
Loss = 7.2090e-03, PNorm = 162.3699, GNorm = 0.1227, lr_0 = 3.9239e-04
Loss = 7.2127e-03, PNorm = 162.3868, GNorm = 0.2356, lr_0 = 3.9212e-04
Loss = 6.5044e-03, PNorm = 162.4003, GNorm = 0.2894, lr_0 = 3.9185e-04
Loss = 8.3019e-03, PNorm = 162.4158, GNorm = 0.1990, lr_0 = 3.9159e-04
Loss = 8.1419e-03, PNorm = 162.4329, GNorm = 0.2841, lr_0 = 3.9132e-04
Loss = 8.1535e-03, PNorm = 162.4507, GNorm = 0.2273, lr_0 = 3.9105e-04
Loss = 1.0060e-02, PNorm = 162.4680, GNorm = 0.1931, lr_0 = 3.9078e-04
Loss = 6.9462e-03, PNorm = 162.4864, GNorm = 0.2510, lr_0 = 3.9051e-04
Loss = 7.3539e-03, PNorm = 162.5019, GNorm = 0.1104, lr_0 = 3.9025e-04
Loss = 8.6152e-03, PNorm = 162.5167, GNorm = 0.4125, lr_0 = 3.8998e-04
Loss = 7.2891e-03, PNorm = 162.5361, GNorm = 0.1873, lr_0 = 3.8971e-04
Loss = 7.2744e-03, PNorm = 162.5513, GNorm = 0.3218, lr_0 = 3.8945e-04
Loss = 7.3775e-03, PNorm = 162.5705, GNorm = 0.2385, lr_0 = 3.8918e-04
Loss = 9.4736e-03, PNorm = 162.5881, GNorm = 0.1254, lr_0 = 3.8891e-04
Loss = 6.8566e-03, PNorm = 162.6060, GNorm = 0.2444, lr_0 = 3.8865e-04
Loss = 8.7265e-03, PNorm = 162.6226, GNorm = 0.2852, lr_0 = 3.8838e-04
Loss = 9.0554e-03, PNorm = 162.6411, GNorm = 0.2931, lr_0 = 3.8811e-04
Loss = 7.7155e-03, PNorm = 162.6589, GNorm = 0.0951, lr_0 = 3.8785e-04
Loss = 8.9451e-03, PNorm = 162.6741, GNorm = 0.2225, lr_0 = 3.8758e-04
Loss = 1.0010e-02, PNorm = 162.6968, GNorm = 0.6397, lr_0 = 3.8732e-04
Loss = 9.0511e-03, PNorm = 162.7183, GNorm = 0.3304, lr_0 = 3.8705e-04
Loss = 8.9618e-03, PNorm = 162.7374, GNorm = 0.2810, lr_0 = 3.8679e-04
Loss = 6.6997e-03, PNorm = 162.7571, GNorm = 0.1612, lr_0 = 3.8652e-04
Loss = 8.0669e-03, PNorm = 162.7753, GNorm = 0.3091, lr_0 = 3.8626e-04
Loss = 8.9614e-03, PNorm = 162.7942, GNorm = 0.3258, lr_0 = 3.8599e-04
Loss = 7.2222e-03, PNorm = 162.8121, GNorm = 0.1232, lr_0 = 3.8573e-04
Loss = 1.1445e-02, PNorm = 162.8260, GNorm = 0.4751, lr_0 = 3.8546e-04
Loss = 6.9075e-03, PNorm = 162.8418, GNorm = 0.2589, lr_0 = 3.8520e-04
Loss = 8.4014e-03, PNorm = 162.8586, GNorm = 0.1044, lr_0 = 3.8493e-04
Loss = 8.7821e-03, PNorm = 162.8797, GNorm = 0.2777, lr_0 = 3.8467e-04
Loss = 8.4889e-03, PNorm = 162.8959, GNorm = 0.2438, lr_0 = 3.8441e-04
Loss = 6.9512e-03, PNorm = 162.9132, GNorm = 0.2077, lr_0 = 3.8414e-04
Loss = 7.8903e-03, PNorm = 162.9306, GNorm = 0.3154, lr_0 = 3.8388e-04
Loss = 7.8725e-03, PNorm = 162.9493, GNorm = 0.1809, lr_0 = 3.8362e-04
Loss = 1.0119e-02, PNorm = 162.9728, GNorm = 0.3167, lr_0 = 3.8336e-04
Loss = 7.3661e-03, PNorm = 162.9944, GNorm = 0.2235, lr_0 = 3.8309e-04
Loss = 8.7807e-03, PNorm = 163.0134, GNorm = 0.3124, lr_0 = 3.8283e-04
Loss = 8.4300e-03, PNorm = 163.0329, GNorm = 0.1468, lr_0 = 3.8257e-04
Loss = 7.1196e-03, PNorm = 163.0527, GNorm = 0.2793, lr_0 = 3.8231e-04
Loss = 9.0956e-03, PNorm = 163.0666, GNorm = 0.1572, lr_0 = 3.8204e-04
Loss = 1.0319e-02, PNorm = 163.0807, GNorm = 0.6132, lr_0 = 3.8178e-04
Loss = 9.2872e-03, PNorm = 163.1003, GNorm = 0.1301, lr_0 = 3.8152e-04
Loss = 1.2472e-02, PNorm = 163.1207, GNorm = 0.8933, lr_0 = 3.8126e-04
Loss = 7.7209e-03, PNorm = 163.1434, GNorm = 0.5666, lr_0 = 3.8100e-04
Loss = 8.5798e-03, PNorm = 163.1624, GNorm = 0.3234, lr_0 = 3.8074e-04
Loss = 8.0236e-03, PNorm = 163.1829, GNorm = 0.2181, lr_0 = 3.8048e-04
Loss = 7.7933e-03, PNorm = 163.2062, GNorm = 0.0923, lr_0 = 3.8022e-04
Loss = 7.5769e-03, PNorm = 163.2244, GNorm = 0.1285, lr_0 = 3.7995e-04
Loss = 1.0292e-02, PNorm = 163.2396, GNorm = 0.3048, lr_0 = 3.7969e-04
Loss = 9.3870e-03, PNorm = 163.2580, GNorm = 0.4195, lr_0 = 3.7943e-04
Loss = 7.7675e-03, PNorm = 163.2783, GNorm = 0.4079, lr_0 = 3.7917e-04
Loss = 9.7479e-03, PNorm = 163.3042, GNorm = 0.2548, lr_0 = 3.7891e-04
Loss = 1.1786e-02, PNorm = 163.3233, GNorm = 0.2488, lr_0 = 3.7866e-04
Loss = 9.8933e-03, PNorm = 163.3446, GNorm = 0.2268, lr_0 = 3.7840e-04
Loss = 9.0920e-03, PNorm = 163.3634, GNorm = 0.2846, lr_0 = 3.7814e-04
Loss = 7.5144e-03, PNorm = 163.3821, GNorm = 0.3702, lr_0 = 3.7788e-04
Loss = 8.7665e-03, PNorm = 163.4007, GNorm = 0.2457, lr_0 = 3.7762e-04
Loss = 7.7183e-03, PNorm = 163.4167, GNorm = 0.2614, lr_0 = 3.7736e-04
Loss = 1.2828e-02, PNorm = 163.4371, GNorm = 0.5698, lr_0 = 3.7710e-04
Loss = 8.4524e-03, PNorm = 163.4583, GNorm = 0.2471, lr_0 = 3.7684e-04
Loss = 9.6016e-03, PNorm = 163.4773, GNorm = 0.1332, lr_0 = 3.7659e-04
Loss = 8.1478e-03, PNorm = 163.4985, GNorm = 0.2913, lr_0 = 3.7633e-04
Loss = 7.8587e-03, PNorm = 163.5174, GNorm = 0.1580, lr_0 = 3.7607e-04
Loss = 1.0320e-02, PNorm = 163.5363, GNorm = 0.2259, lr_0 = 3.7581e-04
Loss = 6.6272e-03, PNorm = 163.5536, GNorm = 0.1212, lr_0 = 3.7555e-04
Loss = 6.9190e-03, PNorm = 163.5715, GNorm = 0.0923, lr_0 = 3.7530e-04
Loss = 9.2523e-03, PNorm = 163.5875, GNorm = 0.2697, lr_0 = 3.7504e-04
Loss = 8.4367e-03, PNorm = 163.6070, GNorm = 0.4413, lr_0 = 3.7478e-04
Loss = 8.0471e-03, PNorm = 163.6274, GNorm = 0.2941, lr_0 = 3.7453e-04
Loss = 8.4251e-03, PNorm = 163.6477, GNorm = 0.4334, lr_0 = 3.7427e-04
Loss = 8.3926e-03, PNorm = 163.6662, GNorm = 0.2198, lr_0 = 3.7401e-04
Loss = 9.0142e-03, PNorm = 163.6867, GNorm = 0.1623, lr_0 = 3.7376e-04
Loss = 9.5053e-03, PNorm = 163.7074, GNorm = 0.2029, lr_0 = 3.7350e-04
Loss = 8.3793e-03, PNorm = 163.7281, GNorm = 0.5573, lr_0 = 3.7325e-04
Loss = 9.2173e-03, PNorm = 163.7490, GNorm = 0.2287, lr_0 = 3.7299e-04
Loss = 9.2865e-03, PNorm = 163.7709, GNorm = 0.1514, lr_0 = 3.7273e-04
Validation mae = 0.279003
Epoch 14
Loss = 9.3423e-03, PNorm = 163.7840, GNorm = 0.6710, lr_0 = 3.7248e-04
Loss = 8.0110e-03, PNorm = 163.8006, GNorm = 0.2998, lr_0 = 3.7222e-04
Loss = 7.7846e-03, PNorm = 163.8127, GNorm = 0.4827, lr_0 = 3.7197e-04
Loss = 6.5220e-03, PNorm = 163.8243, GNorm = 0.1237, lr_0 = 3.7171e-04
Loss = 7.9496e-03, PNorm = 163.8389, GNorm = 0.1661, lr_0 = 3.7146e-04
Loss = 9.6125e-03, PNorm = 163.8565, GNorm = 0.2126, lr_0 = 3.7120e-04
Loss = 6.9688e-03, PNorm = 163.8724, GNorm = 0.2607, lr_0 = 3.7095e-04
Loss = 7.5651e-03, PNorm = 163.8904, GNorm = 0.2421, lr_0 = 3.7070e-04
Loss = 8.2944e-03, PNorm = 163.9043, GNorm = 0.1693, lr_0 = 3.7044e-04
Loss = 6.2405e-03, PNorm = 163.9195, GNorm = 0.1344, lr_0 = 3.7019e-04
Loss = 7.4533e-03, PNorm = 163.9331, GNorm = 0.2493, lr_0 = 3.6993e-04
Loss = 7.9291e-03, PNorm = 163.9440, GNorm = 0.1288, lr_0 = 3.6968e-04
Loss = 5.8449e-03, PNorm = 163.9536, GNorm = 0.1421, lr_0 = 3.6943e-04
Loss = 7.6307e-03, PNorm = 163.9668, GNorm = 0.1295, lr_0 = 3.6917e-04
Loss = 6.8739e-03, PNorm = 163.9811, GNorm = 0.1941, lr_0 = 3.6892e-04
Loss = 7.6543e-03, PNorm = 163.9959, GNorm = 0.4298, lr_0 = 3.6867e-04
Loss = 6.7411e-03, PNorm = 164.0058, GNorm = 0.3188, lr_0 = 3.6842e-04
Loss = 8.6559e-03, PNorm = 164.0213, GNorm = 0.1483, lr_0 = 3.6816e-04
Loss = 6.3316e-03, PNorm = 164.0395, GNorm = 0.2427, lr_0 = 3.6791e-04
Loss = 7.0571e-03, PNorm = 164.0542, GNorm = 0.1502, lr_0 = 3.6766e-04
Loss = 7.6515e-03, PNorm = 164.0678, GNorm = 0.2636, lr_0 = 3.6741e-04
Loss = 7.8921e-03, PNorm = 164.0814, GNorm = 0.2957, lr_0 = 3.6716e-04
Loss = 6.4822e-03, PNorm = 164.0971, GNorm = 0.2753, lr_0 = 3.6690e-04
Loss = 7.9471e-03, PNorm = 164.1126, GNorm = 0.2901, lr_0 = 3.6665e-04
Loss = 7.2823e-03, PNorm = 164.1302, GNorm = 0.2355, lr_0 = 3.6640e-04
Loss = 7.0220e-03, PNorm = 164.1437, GNorm = 0.3235, lr_0 = 3.6615e-04
Loss = 8.1061e-03, PNorm = 164.1615, GNorm = 0.1613, lr_0 = 3.6590e-04
Loss = 6.6962e-03, PNorm = 164.1740, GNorm = 0.1829, lr_0 = 3.6565e-04
Loss = 7.1158e-03, PNorm = 164.1912, GNorm = 0.3052, lr_0 = 3.6540e-04
Loss = 7.9736e-03, PNorm = 164.2041, GNorm = 0.2259, lr_0 = 3.6515e-04
Loss = 7.4955e-03, PNorm = 164.2175, GNorm = 0.1315, lr_0 = 3.6490e-04
Loss = 6.7363e-03, PNorm = 164.2293, GNorm = 0.0746, lr_0 = 3.6465e-04
Loss = 7.4895e-03, PNorm = 164.2426, GNorm = 0.3072, lr_0 = 3.6440e-04
Loss = 6.4200e-03, PNorm = 164.2568, GNorm = 0.1933, lr_0 = 3.6415e-04
Loss = 7.1548e-03, PNorm = 164.2734, GNorm = 0.0964, lr_0 = 3.6390e-04
Loss = 6.3834e-03, PNorm = 164.2888, GNorm = 0.4744, lr_0 = 3.6365e-04
Loss = 7.8169e-03, PNorm = 164.3046, GNorm = 0.2120, lr_0 = 3.6340e-04
Loss = 6.5798e-03, PNorm = 164.3227, GNorm = 0.0948, lr_0 = 3.6315e-04
Loss = 6.1014e-03, PNorm = 164.3392, GNorm = 0.1098, lr_0 = 3.6290e-04
Loss = 5.9061e-03, PNorm = 164.3531, GNorm = 0.2751, lr_0 = 3.6266e-04
Loss = 5.8797e-03, PNorm = 164.3679, GNorm = 0.3385, lr_0 = 3.6241e-04
Loss = 7.3589e-03, PNorm = 164.3824, GNorm = 0.4833, lr_0 = 3.6216e-04
Loss = 6.6363e-03, PNorm = 164.3949, GNorm = 0.1439, lr_0 = 3.6191e-04
Loss = 6.8189e-03, PNorm = 164.4078, GNorm = 0.2899, lr_0 = 3.6166e-04
Loss = 6.8448e-03, PNorm = 164.4230, GNorm = 0.1455, lr_0 = 3.6141e-04
Loss = 6.7932e-03, PNorm = 164.4377, GNorm = 0.1097, lr_0 = 3.6117e-04
Loss = 7.2546e-03, PNorm = 164.4530, GNorm = 0.1889, lr_0 = 3.6092e-04
Loss = 9.8079e-03, PNorm = 164.4660, GNorm = 0.4995, lr_0 = 3.6067e-04
Loss = 6.6768e-03, PNorm = 164.4798, GNorm = 0.1225, lr_0 = 3.6043e-04
Loss = 7.5110e-03, PNorm = 164.4971, GNorm = 0.3489, lr_0 = 3.6018e-04
Loss = 6.0276e-03, PNorm = 164.5126, GNorm = 0.4083, lr_0 = 3.5993e-04
Loss = 9.0590e-03, PNorm = 164.5292, GNorm = 0.2506, lr_0 = 3.5969e-04
Loss = 9.9889e-03, PNorm = 164.5486, GNorm = 0.3325, lr_0 = 3.5944e-04
Loss = 5.9936e-03, PNorm = 164.5628, GNorm = 0.2791, lr_0 = 3.5919e-04
Loss = 7.9828e-03, PNorm = 164.5754, GNorm = 0.2732, lr_0 = 3.5895e-04
Loss = 6.8485e-03, PNorm = 164.5908, GNorm = 0.2490, lr_0 = 3.5870e-04
Loss = 6.4969e-03, PNorm = 164.6061, GNorm = 0.4103, lr_0 = 3.5845e-04
Loss = 6.7449e-03, PNorm = 164.6233, GNorm = 0.2638, lr_0 = 3.5821e-04
Loss = 6.5729e-03, PNorm = 164.6412, GNorm = 0.2606, lr_0 = 3.5796e-04
Loss = 7.5780e-03, PNorm = 164.6588, GNorm = 0.4296, lr_0 = 3.5772e-04
Loss = 7.1001e-03, PNorm = 164.6744, GNorm = 0.2331, lr_0 = 3.5747e-04
Loss = 5.8573e-03, PNorm = 164.6892, GNorm = 0.1522, lr_0 = 3.5723e-04
Loss = 7.6061e-03, PNorm = 164.7043, GNorm = 0.2025, lr_0 = 3.5698e-04
Loss = 7.1796e-03, PNorm = 164.7184, GNorm = 0.2541, lr_0 = 3.5674e-04
Loss = 6.3355e-03, PNorm = 164.7308, GNorm = 0.2835, lr_0 = 3.5650e-04
Loss = 7.4999e-03, PNorm = 164.7433, GNorm = 0.1944, lr_0 = 3.5625e-04
Loss = 7.8015e-03, PNorm = 164.7596, GNorm = 0.2100, lr_0 = 3.5601e-04
Loss = 6.2330e-03, PNorm = 164.7765, GNorm = 0.2047, lr_0 = 3.5576e-04
Loss = 5.9344e-03, PNorm = 164.7902, GNorm = 0.2708, lr_0 = 3.5552e-04
Loss = 7.2078e-03, PNorm = 164.8032, GNorm = 0.3808, lr_0 = 3.5528e-04
Loss = 6.3699e-03, PNorm = 164.8179, GNorm = 0.1917, lr_0 = 3.5503e-04
Loss = 7.7236e-03, PNorm = 164.8312, GNorm = 0.2213, lr_0 = 3.5479e-04
Loss = 6.1044e-03, PNorm = 164.8470, GNorm = 0.3050, lr_0 = 3.5455e-04
Loss = 8.1058e-03, PNorm = 164.8618, GNorm = 0.3811, lr_0 = 3.5430e-04
Loss = 8.4893e-03, PNorm = 164.8808, GNorm = 0.3639, lr_0 = 3.5406e-04
Loss = 6.9494e-03, PNorm = 164.9013, GNorm = 0.1921, lr_0 = 3.5382e-04
Loss = 8.5858e-03, PNorm = 164.9212, GNorm = 0.2668, lr_0 = 3.5358e-04
Loss = 7.9381e-03, PNorm = 164.9356, GNorm = 0.1252, lr_0 = 3.5333e-04
Loss = 5.9171e-03, PNorm = 164.9488, GNorm = 0.1192, lr_0 = 3.5309e-04
Loss = 6.2932e-03, PNorm = 164.9610, GNorm = 0.2126, lr_0 = 3.5285e-04
Loss = 6.8219e-03, PNorm = 164.9753, GNorm = 0.4408, lr_0 = 3.5261e-04
Loss = 8.7851e-03, PNorm = 164.9880, GNorm = 0.3451, lr_0 = 3.5237e-04
Loss = 5.5406e-03, PNorm = 165.0029, GNorm = 0.1903, lr_0 = 3.5212e-04
Loss = 5.9408e-03, PNorm = 165.0213, GNorm = 0.1780, lr_0 = 3.5188e-04
Loss = 6.9833e-03, PNorm = 165.0351, GNorm = 0.3222, lr_0 = 3.5164e-04
Loss = 6.1761e-03, PNorm = 165.0526, GNorm = 0.2681, lr_0 = 3.5140e-04
Loss = 6.6455e-03, PNorm = 165.0676, GNorm = 0.2739, lr_0 = 3.5116e-04
Loss = 5.3385e-03, PNorm = 165.0824, GNorm = 0.4637, lr_0 = 3.5092e-04
Loss = 6.2461e-03, PNorm = 165.0955, GNorm = 0.1789, lr_0 = 3.5068e-04
Loss = 7.7697e-03, PNorm = 165.1072, GNorm = 0.2497, lr_0 = 3.5044e-04
Loss = 7.9453e-03, PNorm = 165.1202, GNorm = 0.4292, lr_0 = 3.5020e-04
Loss = 9.6710e-03, PNorm = 165.1385, GNorm = 0.6583, lr_0 = 3.4996e-04
Loss = 6.4363e-03, PNorm = 165.1529, GNorm = 0.1277, lr_0 = 3.4972e-04
Loss = 6.4273e-03, PNorm = 165.1700, GNorm = 0.4792, lr_0 = 3.4948e-04
Loss = 5.8242e-03, PNorm = 165.1875, GNorm = 0.1728, lr_0 = 3.4924e-04
Loss = 6.3338e-03, PNorm = 165.2025, GNorm = 0.2875, lr_0 = 3.4900e-04
Loss = 5.1682e-03, PNorm = 165.2209, GNorm = 0.2717, lr_0 = 3.4876e-04
Loss = 7.4603e-03, PNorm = 165.2386, GNorm = 0.3077, lr_0 = 3.4852e-04
Loss = 6.3446e-03, PNorm = 165.2571, GNorm = 0.1973, lr_0 = 3.4828e-04
Loss = 6.8114e-03, PNorm = 165.2762, GNorm = 0.0832, lr_0 = 3.4805e-04
Loss = 1.1138e-02, PNorm = 165.2927, GNorm = 0.7996, lr_0 = 3.4781e-04
Loss = 8.2395e-03, PNorm = 165.3080, GNorm = 0.2734, lr_0 = 3.4757e-04
Loss = 8.3997e-03, PNorm = 165.3239, GNorm = 0.6188, lr_0 = 3.4733e-04
Loss = 9.2498e-03, PNorm = 165.3406, GNorm = 0.2928, lr_0 = 3.4709e-04
Loss = 7.4249e-03, PNorm = 165.3603, GNorm = 0.3288, lr_0 = 3.4686e-04
Loss = 7.3718e-03, PNorm = 165.3782, GNorm = 0.1290, lr_0 = 3.4662e-04
Loss = 7.0988e-03, PNorm = 165.3965, GNorm = 0.2166, lr_0 = 3.4638e-04
Loss = 7.9359e-03, PNorm = 165.4099, GNorm = 0.1146, lr_0 = 3.4614e-04
Loss = 6.9901e-03, PNorm = 165.4223, GNorm = 0.3270, lr_0 = 3.4591e-04
Loss = 8.8471e-03, PNorm = 165.4347, GNorm = 0.3629, lr_0 = 3.4567e-04
Loss = 7.1921e-03, PNorm = 165.4473, GNorm = 0.3815, lr_0 = 3.4543e-04
Loss = 6.2856e-03, PNorm = 165.4639, GNorm = 0.2382, lr_0 = 3.4520e-04
Loss = 6.4226e-03, PNorm = 165.4767, GNorm = 0.3378, lr_0 = 3.4496e-04
Loss = 7.4187e-03, PNorm = 165.4892, GNorm = 0.1229, lr_0 = 3.4472e-04
Loss = 1.1042e-02, PNorm = 165.5048, GNorm = 0.4352, lr_0 = 3.4449e-04
Loss = 1.2297e-02, PNorm = 165.5229, GNorm = 0.0936, lr_0 = 3.4425e-04
Loss = 6.9649e-03, PNorm = 165.5445, GNorm = 0.3052, lr_0 = 3.4402e-04
Loss = 7.6405e-03, PNorm = 165.5646, GNorm = 0.1769, lr_0 = 3.4378e-04
Loss = 7.6770e-03, PNorm = 165.5841, GNorm = 0.2413, lr_0 = 3.4354e-04
Loss = 7.3131e-03, PNorm = 165.6001, GNorm = 0.1041, lr_0 = 3.4331e-04
Validation mae = 0.279795
Epoch 15
Loss = 5.9401e-03, PNorm = 165.6112, GNorm = 0.2226, lr_0 = 3.4307e-04
Loss = 6.3607e-03, PNorm = 165.6203, GNorm = 0.2615, lr_0 = 3.4284e-04
Loss = 7.5791e-03, PNorm = 165.6306, GNorm = 0.3396, lr_0 = 3.4260e-04
Loss = 5.7643e-03, PNorm = 165.6399, GNorm = 0.3446, lr_0 = 3.4237e-04
Loss = 6.9959e-03, PNorm = 165.6524, GNorm = 0.2543, lr_0 = 3.4213e-04
Loss = 6.0101e-03, PNorm = 165.6696, GNorm = 0.0830, lr_0 = 3.4190e-04
Loss = 6.6439e-03, PNorm = 165.6833, GNorm = 0.2240, lr_0 = 3.4167e-04
Loss = 6.5260e-03, PNorm = 165.6962, GNorm = 0.1705, lr_0 = 3.4143e-04
Loss = 7.4052e-03, PNorm = 165.7102, GNorm = 0.4195, lr_0 = 3.4120e-04
Loss = 5.4931e-03, PNorm = 165.7251, GNorm = 0.1307, lr_0 = 3.4096e-04
Loss = 6.5759e-03, PNorm = 165.7386, GNorm = 0.1361, lr_0 = 3.4073e-04
Loss = 5.9157e-03, PNorm = 165.7500, GNorm = 0.1346, lr_0 = 3.4050e-04
Loss = 6.5713e-03, PNorm = 165.7615, GNorm = 0.2160, lr_0 = 3.4026e-04
Loss = 5.4402e-03, PNorm = 165.7727, GNorm = 0.2185, lr_0 = 3.4003e-04
Loss = 6.7260e-03, PNorm = 165.7798, GNorm = 0.1898, lr_0 = 3.3980e-04
Loss = 7.4053e-03, PNorm = 165.7864, GNorm = 0.1145, lr_0 = 3.3956e-04
Loss = 6.0369e-03, PNorm = 165.7995, GNorm = 0.4269, lr_0 = 3.3933e-04
Loss = 5.4141e-03, PNorm = 165.8109, GNorm = 0.1276, lr_0 = 3.3910e-04
Loss = 7.5578e-03, PNorm = 165.8257, GNorm = 0.0859, lr_0 = 3.3887e-04
Loss = 5.9753e-03, PNorm = 165.8408, GNorm = 0.3446, lr_0 = 3.3864e-04
Loss = 5.2788e-03, PNorm = 165.8547, GNorm = 0.3347, lr_0 = 3.3840e-04
Loss = 8.4133e-03, PNorm = 165.8631, GNorm = 0.1832, lr_0 = 3.3817e-04
Loss = 5.0874e-03, PNorm = 165.8734, GNorm = 0.2494, lr_0 = 3.3794e-04
Loss = 5.9925e-03, PNorm = 165.8867, GNorm = 0.2902, lr_0 = 3.3771e-04
Loss = 5.8200e-03, PNorm = 165.8990, GNorm = 0.3228, lr_0 = 3.3748e-04
Loss = 7.4567e-03, PNorm = 165.9131, GNorm = 0.2668, lr_0 = 3.3725e-04
Loss = 1.1911e-02, PNorm = 165.9242, GNorm = 0.1084, lr_0 = 3.3701e-04
Loss = 5.1424e-03, PNorm = 165.9378, GNorm = 0.2843, lr_0 = 3.3678e-04
Loss = 5.5671e-03, PNorm = 165.9513, GNorm = 0.3493, lr_0 = 3.3655e-04
Loss = 5.8495e-03, PNorm = 165.9645, GNorm = 0.1486, lr_0 = 3.3632e-04
Loss = 6.8158e-03, PNorm = 165.9762, GNorm = 0.1462, lr_0 = 3.3609e-04
Loss = 6.0464e-03, PNorm = 165.9911, GNorm = 0.2302, lr_0 = 3.3586e-04
Loss = 7.2073e-03, PNorm = 166.0046, GNorm = 0.2469, lr_0 = 3.3563e-04
Loss = 6.8160e-03, PNorm = 166.0174, GNorm = 0.1492, lr_0 = 3.3540e-04
Loss = 8.0515e-03, PNorm = 166.0326, GNorm = 0.1119, lr_0 = 3.3517e-04
Loss = 7.4540e-03, PNorm = 166.0454, GNorm = 0.2735, lr_0 = 3.3494e-04
Loss = 9.3766e-03, PNorm = 166.0588, GNorm = 0.0748, lr_0 = 3.3471e-04
Loss = 6.7282e-03, PNorm = 166.0710, GNorm = 0.1750, lr_0 = 3.3448e-04
Loss = 7.0480e-03, PNorm = 166.0832, GNorm = 0.1882, lr_0 = 3.3425e-04
Loss = 6.5037e-03, PNorm = 166.0982, GNorm = 0.0908, lr_0 = 3.3403e-04
Loss = 6.4580e-03, PNorm = 166.1122, GNorm = 0.2093, lr_0 = 3.3380e-04
Loss = 5.4338e-03, PNorm = 166.1258, GNorm = 0.2762, lr_0 = 3.3357e-04
Loss = 6.2362e-03, PNorm = 166.1378, GNorm = 0.1943, lr_0 = 3.3334e-04
Loss = 6.3900e-03, PNorm = 166.1514, GNorm = 0.3577, lr_0 = 3.3311e-04
Loss = 6.7799e-03, PNorm = 166.1681, GNorm = 0.1046, lr_0 = 3.3288e-04
Loss = 8.9115e-03, PNorm = 166.1815, GNorm = 0.5293, lr_0 = 3.3265e-04
Loss = 5.7009e-03, PNorm = 166.1921, GNorm = 0.3957, lr_0 = 3.3243e-04
Loss = 5.2748e-03, PNorm = 166.2002, GNorm = 0.1587, lr_0 = 3.3220e-04
Loss = 5.4943e-03, PNorm = 166.2100, GNorm = 0.3621, lr_0 = 3.3197e-04
Loss = 5.8616e-03, PNorm = 166.2225, GNorm = 0.2306, lr_0 = 3.3174e-04
Loss = 5.1625e-03, PNorm = 166.2368, GNorm = 0.1060, lr_0 = 3.3152e-04
Loss = 6.6379e-03, PNorm = 166.2486, GNorm = 0.3011, lr_0 = 3.3129e-04
Loss = 5.1420e-03, PNorm = 166.2595, GNorm = 0.1071, lr_0 = 3.3106e-04
Loss = 6.3545e-03, PNorm = 166.2727, GNorm = 0.0697, lr_0 = 3.3084e-04
Loss = 7.4145e-03, PNorm = 166.2836, GNorm = 0.6373, lr_0 = 3.3061e-04
Loss = 8.3809e-03, PNorm = 166.2982, GNorm = 0.3597, lr_0 = 3.3038e-04
Loss = 6.1143e-03, PNorm = 166.3100, GNorm = 0.2675, lr_0 = 3.3016e-04
Loss = 6.2295e-03, PNorm = 166.3213, GNorm = 0.1948, lr_0 = 3.2993e-04
Loss = 6.2103e-03, PNorm = 166.3354, GNorm = 0.2504, lr_0 = 3.2970e-04
Loss = 6.2449e-03, PNorm = 166.3509, GNorm = 0.1298, lr_0 = 3.2948e-04
Loss = 4.6925e-03, PNorm = 166.3654, GNorm = 0.2020, lr_0 = 3.2925e-04
Loss = 5.7767e-03, PNorm = 166.3745, GNorm = 0.4277, lr_0 = 3.2903e-04
Loss = 7.0472e-03, PNorm = 166.3863, GNorm = 0.2497, lr_0 = 3.2880e-04
Loss = 5.9860e-03, PNorm = 166.3933, GNorm = 0.2347, lr_0 = 3.2858e-04
Loss = 5.5661e-03, PNorm = 166.4045, GNorm = 0.1579, lr_0 = 3.2835e-04
Loss = 8.0610e-03, PNorm = 166.4145, GNorm = 0.4042, lr_0 = 3.2813e-04
Loss = 4.7519e-03, PNorm = 166.4269, GNorm = 0.1166, lr_0 = 3.2790e-04
Loss = 8.1071e-03, PNorm = 166.4425, GNorm = 0.4171, lr_0 = 3.2768e-04
Loss = 6.3642e-03, PNorm = 166.4593, GNorm = 0.1401, lr_0 = 3.2745e-04
Loss = 5.1934e-03, PNorm = 166.4735, GNorm = 0.1800, lr_0 = 3.2723e-04
Loss = 4.5902e-03, PNorm = 166.4851, GNorm = 0.2835, lr_0 = 3.2700e-04
Loss = 5.5386e-03, PNorm = 166.4965, GNorm = 0.1192, lr_0 = 3.2678e-04
Loss = 5.6959e-03, PNorm = 166.5119, GNorm = 0.2664, lr_0 = 3.2656e-04
Loss = 4.7827e-03, PNorm = 166.5268, GNorm = 0.1127, lr_0 = 3.2633e-04
Loss = 5.4918e-03, PNorm = 166.5394, GNorm = 0.0897, lr_0 = 3.2611e-04
Loss = 5.5092e-03, PNorm = 166.5520, GNorm = 0.1886, lr_0 = 3.2589e-04
Loss = 5.1475e-03, PNorm = 166.5643, GNorm = 0.1126, lr_0 = 3.2566e-04
Loss = 4.9952e-03, PNorm = 166.5747, GNorm = 0.1843, lr_0 = 3.2544e-04
Loss = 5.8452e-03, PNorm = 166.5855, GNorm = 0.4406, lr_0 = 3.2522e-04
Loss = 6.0691e-03, PNorm = 166.5958, GNorm = 0.1552, lr_0 = 3.2499e-04
Loss = 5.0514e-03, PNorm = 166.6083, GNorm = 0.1687, lr_0 = 3.2477e-04
Loss = 5.8221e-03, PNorm = 166.6221, GNorm = 0.3234, lr_0 = 3.2455e-04
Loss = 4.3731e-03, PNorm = 166.6350, GNorm = 0.2356, lr_0 = 3.2433e-04
Loss = 5.3112e-03, PNorm = 166.6476, GNorm = 0.1812, lr_0 = 3.2410e-04
Loss = 5.4808e-03, PNorm = 166.6581, GNorm = 0.3281, lr_0 = 3.2388e-04
Loss = 5.6434e-03, PNorm = 166.6707, GNorm = 0.1633, lr_0 = 3.2366e-04
Loss = 5.2578e-03, PNorm = 166.6836, GNorm = 0.2088, lr_0 = 3.2344e-04
Loss = 6.2379e-03, PNorm = 166.6915, GNorm = 0.1583, lr_0 = 3.2322e-04
Loss = 4.9863e-03, PNorm = 166.7003, GNorm = 0.1784, lr_0 = 3.2300e-04
Loss = 6.2724e-03, PNorm = 166.7096, GNorm = 0.1357, lr_0 = 3.2277e-04
Loss = 8.4770e-03, PNorm = 166.7242, GNorm = 0.2801, lr_0 = 3.2255e-04
Loss = 4.8893e-03, PNorm = 166.7387, GNorm = 0.1261, lr_0 = 3.2233e-04
Loss = 6.5494e-03, PNorm = 166.7533, GNorm = 0.1887, lr_0 = 3.2211e-04
Loss = 7.0723e-03, PNorm = 166.7631, GNorm = 0.2049, lr_0 = 3.2189e-04
Loss = 5.8624e-03, PNorm = 166.7734, GNorm = 0.5546, lr_0 = 3.2167e-04
Loss = 6.4572e-03, PNorm = 166.7907, GNorm = 0.0853, lr_0 = 3.2145e-04
Loss = 4.7025e-03, PNorm = 166.8085, GNorm = 0.0993, lr_0 = 3.2123e-04
Loss = 6.8822e-03, PNorm = 166.8248, GNorm = 0.2118, lr_0 = 3.2101e-04
Loss = 6.5708e-03, PNorm = 166.8398, GNorm = 0.2011, lr_0 = 3.2079e-04
Loss = 8.6207e-03, PNorm = 166.8555, GNorm = 1.3576, lr_0 = 3.2057e-04
Loss = 5.1716e-03, PNorm = 166.8676, GNorm = 0.1367, lr_0 = 3.2035e-04
Loss = 7.4610e-03, PNorm = 166.8816, GNorm = 0.1738, lr_0 = 3.2013e-04
Loss = 5.9522e-03, PNorm = 166.8932, GNorm = 0.1845, lr_0 = 3.1991e-04
Loss = 7.0383e-03, PNorm = 166.9095, GNorm = 0.1231, lr_0 = 3.1969e-04
Loss = 6.9072e-03, PNorm = 166.9246, GNorm = 0.2462, lr_0 = 3.1947e-04
Loss = 5.5598e-03, PNorm = 166.9377, GNorm = 0.1483, lr_0 = 3.1925e-04
Loss = 5.1824e-03, PNorm = 166.9517, GNorm = 0.0899, lr_0 = 3.1904e-04
Loss = 7.1106e-03, PNorm = 166.9664, GNorm = 0.1622, lr_0 = 3.1882e-04
Loss = 5.7370e-03, PNorm = 166.9805, GNorm = 0.2200, lr_0 = 3.1860e-04
Loss = 5.7312e-03, PNorm = 166.9925, GNorm = 0.4554, lr_0 = 3.1838e-04
Loss = 5.4685e-03, PNorm = 167.0055, GNorm = 0.2605, lr_0 = 3.1816e-04
Loss = 6.5673e-03, PNorm = 167.0181, GNorm = 0.2953, lr_0 = 3.1794e-04
Loss = 6.0546e-03, PNorm = 167.0306, GNorm = 0.1965, lr_0 = 3.1773e-04
Loss = 5.4996e-03, PNorm = 167.0451, GNorm = 0.1617, lr_0 = 3.1751e-04
Loss = 7.7561e-03, PNorm = 167.0608, GNorm = 0.2735, lr_0 = 3.1729e-04
Loss = 8.2038e-03, PNorm = 167.0777, GNorm = 0.4791, lr_0 = 3.1707e-04
Loss = 7.2015e-03, PNorm = 167.0929, GNorm = 0.1547, lr_0 = 3.1686e-04
Loss = 5.4885e-03, PNorm = 167.1050, GNorm = 0.1721, lr_0 = 3.1664e-04
Loss = 5.7595e-03, PNorm = 167.1152, GNorm = 0.3125, lr_0 = 3.1642e-04
Loss = 6.5431e-03, PNorm = 167.1287, GNorm = 0.2581, lr_0 = 3.1621e-04
Validation mae = 0.279051
Epoch 16
Loss = 5.8338e-03, PNorm = 167.1386, GNorm = 0.1145, lr_0 = 3.1599e-04
Loss = 4.5741e-03, PNorm = 167.1461, GNorm = 0.1910, lr_0 = 3.1577e-04
Loss = 5.3359e-03, PNorm = 167.1534, GNorm = 0.1701, lr_0 = 3.1556e-04
Loss = 6.3461e-03, PNorm = 167.1619, GNorm = 0.1775, lr_0 = 3.1534e-04
Loss = 5.2697e-03, PNorm = 167.1716, GNorm = 0.1344, lr_0 = 3.1512e-04
Loss = 6.0173e-03, PNorm = 167.1827, GNorm = 0.4203, lr_0 = 3.1491e-04
Loss = 4.2988e-03, PNorm = 167.1930, GNorm = 0.0938, lr_0 = 3.1469e-04
Loss = 4.9315e-03, PNorm = 167.2022, GNorm = 0.3481, lr_0 = 3.1448e-04
Loss = 6.7067e-03, PNorm = 167.2135, GNorm = 0.2374, lr_0 = 3.1426e-04
Loss = 4.7873e-03, PNorm = 167.2257, GNorm = 0.1007, lr_0 = 3.1405e-04
Loss = 4.9601e-03, PNorm = 167.2346, GNorm = 0.3085, lr_0 = 3.1383e-04
Loss = 4.4927e-03, PNorm = 167.2446, GNorm = 0.3052, lr_0 = 3.1362e-04
Loss = 7.3323e-03, PNorm = 167.2527, GNorm = 0.2788, lr_0 = 3.1340e-04
Loss = 5.7128e-03, PNorm = 167.2641, GNorm = 0.2097, lr_0 = 3.1319e-04
Loss = 5.0842e-03, PNorm = 167.2744, GNorm = 0.2452, lr_0 = 3.1297e-04
Loss = 4.9133e-03, PNorm = 167.2828, GNorm = 0.0860, lr_0 = 3.1276e-04
Loss = 5.3733e-03, PNorm = 167.2911, GNorm = 0.1305, lr_0 = 3.1254e-04
Loss = 5.5851e-03, PNorm = 167.3028, GNorm = 0.2142, lr_0 = 3.1233e-04
Loss = 4.6713e-03, PNorm = 167.3149, GNorm = 0.0874, lr_0 = 3.1212e-04
Loss = 4.7975e-03, PNorm = 167.3230, GNorm = 0.1681, lr_0 = 3.1190e-04
Loss = 5.7307e-03, PNorm = 167.3326, GNorm = 0.2725, lr_0 = 3.1169e-04
Loss = 4.7430e-03, PNorm = 167.3389, GNorm = 0.1746, lr_0 = 3.1147e-04
Loss = 3.8732e-03, PNorm = 167.3497, GNorm = 0.2000, lr_0 = 3.1126e-04
Loss = 5.0715e-03, PNorm = 167.3575, GNorm = 0.3189, lr_0 = 3.1105e-04
Loss = 4.4656e-03, PNorm = 167.3691, GNorm = 0.3446, lr_0 = 3.1083e-04
Loss = 4.2219e-03, PNorm = 167.3770, GNorm = 0.2211, lr_0 = 3.1062e-04
Loss = 5.0359e-03, PNorm = 167.3855, GNorm = 0.1439, lr_0 = 3.1041e-04
Loss = 4.8143e-03, PNorm = 167.3934, GNorm = 0.1068, lr_0 = 3.1020e-04
Loss = 6.1027e-03, PNorm = 167.4031, GNorm = 0.3510, lr_0 = 3.0998e-04
Loss = 4.1792e-03, PNorm = 167.4124, GNorm = 0.2309, lr_0 = 3.0977e-04
Loss = 4.3408e-03, PNorm = 167.4221, GNorm = 0.3721, lr_0 = 3.0956e-04
Loss = 5.1314e-03, PNorm = 167.4312, GNorm = 0.0979, lr_0 = 3.0935e-04
Loss = 5.0054e-03, PNorm = 167.4431, GNorm = 0.0717, lr_0 = 3.0914e-04
Loss = 4.3807e-03, PNorm = 167.4527, GNorm = 0.1535, lr_0 = 3.0892e-04
Loss = 4.7314e-03, PNorm = 167.4620, GNorm = 0.1912, lr_0 = 3.0871e-04
Loss = 4.7050e-03, PNorm = 167.4695, GNorm = 0.1165, lr_0 = 3.0850e-04
Loss = 4.1653e-03, PNorm = 167.4773, GNorm = 0.2569, lr_0 = 3.0829e-04
Loss = 4.1489e-03, PNorm = 167.4871, GNorm = 0.3296, lr_0 = 3.0808e-04
Loss = 5.3595e-03, PNorm = 167.4995, GNorm = 0.1273, lr_0 = 3.0787e-04
Loss = 4.8011e-03, PNorm = 167.5117, GNorm = 0.1915, lr_0 = 3.0766e-04
Loss = 8.7637e-03, PNorm = 167.5201, GNorm = 0.4813, lr_0 = 3.0745e-04
Loss = 6.2021e-03, PNorm = 167.5287, GNorm = 0.4478, lr_0 = 3.0723e-04
Loss = 5.1598e-03, PNorm = 167.5372, GNorm = 0.1842, lr_0 = 3.0702e-04
Loss = 5.5429e-03, PNorm = 167.5454, GNorm = 0.0990, lr_0 = 3.0681e-04
Loss = 4.6627e-03, PNorm = 167.5559, GNorm = 0.2244, lr_0 = 3.0660e-04
Loss = 5.2027e-03, PNorm = 167.5658, GNorm = 0.3764, lr_0 = 3.0639e-04
Loss = 7.3055e-03, PNorm = 167.5745, GNorm = 0.2318, lr_0 = 3.0618e-04
Loss = 5.0164e-03, PNorm = 167.5834, GNorm = 0.1007, lr_0 = 3.0597e-04
Loss = 4.1095e-03, PNorm = 167.5944, GNorm = 0.2309, lr_0 = 3.0576e-04
Loss = 5.4273e-03, PNorm = 167.6051, GNorm = 0.2010, lr_0 = 3.0555e-04
Loss = 7.3391e-03, PNorm = 167.6205, GNorm = 0.3461, lr_0 = 3.0535e-04
Loss = 5.3476e-03, PNorm = 167.6334, GNorm = 0.1743, lr_0 = 3.0514e-04
Loss = 4.8771e-03, PNorm = 167.6421, GNorm = 0.4222, lr_0 = 3.0493e-04
Loss = 4.2562e-03, PNorm = 167.6526, GNorm = 0.1551, lr_0 = 3.0472e-04
Loss = 4.7345e-03, PNorm = 167.6631, GNorm = 0.4303, lr_0 = 3.0451e-04
Loss = 5.1433e-03, PNorm = 167.6757, GNorm = 0.2187, lr_0 = 3.0430e-04
Loss = 6.4118e-03, PNorm = 167.6862, GNorm = 0.0633, lr_0 = 3.0409e-04
Loss = 5.3074e-03, PNorm = 167.6949, GNorm = 0.2552, lr_0 = 3.0388e-04
Loss = 6.4324e-03, PNorm = 167.7057, GNorm = 0.1726, lr_0 = 3.0368e-04
Loss = 5.0363e-03, PNorm = 167.7154, GNorm = 0.2606, lr_0 = 3.0347e-04
Loss = 4.6958e-03, PNorm = 167.7251, GNorm = 0.2996, lr_0 = 3.0326e-04
Loss = 4.7043e-03, PNorm = 167.7369, GNorm = 0.3143, lr_0 = 3.0305e-04
Loss = 4.6590e-03, PNorm = 167.7470, GNorm = 0.1031, lr_0 = 3.0284e-04
Loss = 4.6033e-03, PNorm = 167.7569, GNorm = 0.0799, lr_0 = 3.0264e-04
Loss = 7.8193e-03, PNorm = 167.7670, GNorm = 0.2140, lr_0 = 3.0243e-04
Loss = 4.9296e-03, PNorm = 167.7795, GNorm = 0.0615, lr_0 = 3.0222e-04
Loss = 5.4520e-03, PNorm = 167.7892, GNorm = 0.1922, lr_0 = 3.0202e-04
Loss = 4.4043e-03, PNorm = 167.8000, GNorm = 0.3510, lr_0 = 3.0181e-04
Loss = 4.5185e-03, PNorm = 167.8091, GNorm = 0.1193, lr_0 = 3.0160e-04
Loss = 4.2029e-03, PNorm = 167.8169, GNorm = 0.2627, lr_0 = 3.0140e-04
Loss = 4.1120e-03, PNorm = 167.8255, GNorm = 0.1111, lr_0 = 3.0119e-04
Loss = 5.3043e-03, PNorm = 167.8331, GNorm = 0.2107, lr_0 = 3.0098e-04
Loss = 4.8082e-03, PNorm = 167.8412, GNorm = 0.0784, lr_0 = 3.0078e-04
Loss = 4.9772e-03, PNorm = 167.8513, GNorm = 0.0791, lr_0 = 3.0057e-04
Loss = 4.9375e-03, PNorm = 167.8641, GNorm = 0.2023, lr_0 = 3.0036e-04
Loss = 4.6673e-03, PNorm = 167.8731, GNorm = 0.1819, lr_0 = 3.0016e-04
Loss = 5.6352e-03, PNorm = 167.8858, GNorm = 0.1872, lr_0 = 2.9995e-04
Loss = 4.3849e-03, PNorm = 167.8982, GNorm = 0.3106, lr_0 = 2.9975e-04
Loss = 5.1297e-03, PNorm = 167.9103, GNorm = 0.0749, lr_0 = 2.9954e-04
Loss = 4.1157e-03, PNorm = 167.9227, GNorm = 0.0829, lr_0 = 2.9934e-04
Loss = 5.7480e-03, PNorm = 167.9331, GNorm = 0.0675, lr_0 = 2.9913e-04
Loss = 5.4729e-03, PNorm = 167.9441, GNorm = 0.1533, lr_0 = 2.9893e-04
Loss = 4.4967e-03, PNorm = 167.9550, GNorm = 0.3850, lr_0 = 2.9872e-04
Loss = 5.5790e-03, PNorm = 167.9686, GNorm = 0.2290, lr_0 = 2.9852e-04
Loss = 5.0418e-03, PNorm = 167.9793, GNorm = 0.3123, lr_0 = 2.9831e-04
Loss = 6.0375e-03, PNorm = 167.9917, GNorm = 0.2344, lr_0 = 2.9811e-04
Loss = 6.1111e-03, PNorm = 167.9999, GNorm = 0.3102, lr_0 = 2.9790e-04
Loss = 5.4179e-03, PNorm = 168.0144, GNorm = 0.4508, lr_0 = 2.9770e-04
Loss = 9.1242e-03, PNorm = 168.0309, GNorm = 0.2284, lr_0 = 2.9750e-04
Loss = 3.8020e-03, PNorm = 168.0454, GNorm = 0.1086, lr_0 = 2.9729e-04
Loss = 9.3272e-03, PNorm = 168.0564, GNorm = 0.1726, lr_0 = 2.9709e-04
Loss = 5.1056e-03, PNorm = 168.0703, GNorm = 0.1404, lr_0 = 2.9689e-04
Loss = 5.0092e-03, PNorm = 168.0836, GNorm = 0.2623, lr_0 = 2.9668e-04
Loss = 6.6098e-03, PNorm = 168.0959, GNorm = 0.3517, lr_0 = 2.9648e-04
Loss = 4.6811e-03, PNorm = 168.1061, GNorm = 0.0847, lr_0 = 2.9628e-04
Loss = 5.3613e-03, PNorm = 168.1153, GNorm = 0.0644, lr_0 = 2.9607e-04
Loss = 4.9700e-03, PNorm = 168.1240, GNorm = 0.4297, lr_0 = 2.9587e-04
Loss = 3.8011e-03, PNorm = 168.1331, GNorm = 0.1010, lr_0 = 2.9567e-04
Loss = 5.2200e-03, PNorm = 168.1443, GNorm = 0.1830, lr_0 = 2.9546e-04
Loss = 3.9406e-03, PNorm = 168.1549, GNorm = 0.0856, lr_0 = 2.9526e-04
Loss = 4.8359e-03, PNorm = 168.1663, GNorm = 0.3057, lr_0 = 2.9506e-04
Loss = 4.4633e-03, PNorm = 168.1747, GNorm = 0.1088, lr_0 = 2.9486e-04
Loss = 4.8508e-03, PNorm = 168.1855, GNorm = 0.0963, lr_0 = 2.9466e-04
Loss = 5.7935e-03, PNorm = 168.1948, GNorm = 0.4963, lr_0 = 2.9445e-04
Loss = 7.1041e-03, PNorm = 168.2040, GNorm = 0.2163, lr_0 = 2.9425e-04
Loss = 5.7324e-03, PNorm = 168.2150, GNorm = 0.1031, lr_0 = 2.9405e-04
Loss = 4.4992e-03, PNorm = 168.2252, GNorm = 0.2252, lr_0 = 2.9385e-04
Loss = 9.5223e-03, PNorm = 168.2363, GNorm = 0.3286, lr_0 = 2.9365e-04
Loss = 5.3567e-03, PNorm = 168.2477, GNorm = 0.0703, lr_0 = 2.9345e-04
Loss = 5.1561e-03, PNorm = 168.2615, GNorm = 0.1415, lr_0 = 2.9325e-04
Loss = 4.6890e-03, PNorm = 168.2712, GNorm = 0.1634, lr_0 = 2.9305e-04
Loss = 5.1965e-03, PNorm = 168.2827, GNorm = 0.2274, lr_0 = 2.9284e-04
Loss = 8.7900e-03, PNorm = 168.2948, GNorm = 0.0994, lr_0 = 2.9264e-04
Loss = 4.5060e-03, PNorm = 168.3056, GNorm = 0.3799, lr_0 = 2.9244e-04
Loss = 5.3293e-03, PNorm = 168.3161, GNorm = 0.0934, lr_0 = 2.9224e-04
Loss = 5.0540e-03, PNorm = 168.3276, GNorm = 0.2047, lr_0 = 2.9204e-04
Loss = 6.0365e-03, PNorm = 168.3376, GNorm = 0.3561, lr_0 = 2.9184e-04
Loss = 5.6497e-03, PNorm = 168.3533, GNorm = 0.2539, lr_0 = 2.9164e-04
Loss = 5.3975e-03, PNorm = 168.3660, GNorm = 0.4471, lr_0 = 2.9144e-04
Loss = 4.6054e-03, PNorm = 168.3775, GNorm = 0.1295, lr_0 = 2.9124e-04
Validation mae = 0.278125
Epoch 17
Loss = 4.2824e-03, PNorm = 168.3846, GNorm = 0.1129, lr_0 = 2.9104e-04
Loss = 4.5646e-03, PNorm = 168.3953, GNorm = 0.1396, lr_0 = 2.9084e-04
Loss = 4.3814e-03, PNorm = 168.4031, GNorm = 0.3259, lr_0 = 2.9065e-04
Loss = 4.9659e-03, PNorm = 168.4121, GNorm = 0.1700, lr_0 = 2.9045e-04
Loss = 5.1835e-03, PNorm = 168.4221, GNorm = 0.3448, lr_0 = 2.9025e-04
Loss = 4.8518e-03, PNorm = 168.4305, GNorm = 0.2720, lr_0 = 2.9005e-04
Loss = 4.8103e-03, PNorm = 168.4370, GNorm = 0.1592, lr_0 = 2.8985e-04
Loss = 6.4910e-03, PNorm = 168.4430, GNorm = 0.1670, lr_0 = 2.8965e-04
Loss = 3.8224e-03, PNorm = 168.4463, GNorm = 0.2979, lr_0 = 2.8945e-04
Loss = 7.5131e-03, PNorm = 168.4528, GNorm = 0.3266, lr_0 = 2.8925e-04
Loss = 4.0118e-03, PNorm = 168.4618, GNorm = 0.1771, lr_0 = 2.8906e-04
Loss = 4.2036e-03, PNorm = 168.4692, GNorm = 0.1707, lr_0 = 2.8886e-04
Loss = 4.6439e-03, PNorm = 168.4786, GNorm = 0.2032, lr_0 = 2.8866e-04
Loss = 3.7039e-03, PNorm = 168.4885, GNorm = 0.0938, lr_0 = 2.8846e-04
Loss = 3.9966e-03, PNorm = 168.4966, GNorm = 0.0551, lr_0 = 2.8826e-04
Loss = 5.1498e-03, PNorm = 168.5032, GNorm = 0.0531, lr_0 = 2.8807e-04
Loss = 4.7363e-03, PNorm = 168.5094, GNorm = 0.1468, lr_0 = 2.8787e-04
Loss = 3.6341e-03, PNorm = 168.5184, GNorm = 0.1786, lr_0 = 2.8767e-04
Loss = 6.0511e-03, PNorm = 168.5247, GNorm = 0.1197, lr_0 = 2.8748e-04
Loss = 3.7918e-03, PNorm = 168.5291, GNorm = 0.0537, lr_0 = 2.8728e-04
Loss = 5.0304e-03, PNorm = 168.5379, GNorm = 0.2167, lr_0 = 2.8708e-04
Loss = 5.6586e-03, PNorm = 168.5466, GNorm = 0.2280, lr_0 = 2.8689e-04
Loss = 4.0060e-03, PNorm = 168.5567, GNorm = 0.2598, lr_0 = 2.8669e-04
Loss = 3.4906e-03, PNorm = 168.5662, GNorm = 0.0688, lr_0 = 2.8649e-04
Loss = 4.3784e-03, PNorm = 168.5758, GNorm = 0.1156, lr_0 = 2.8630e-04
Loss = 4.2435e-03, PNorm = 168.5851, GNorm = 0.1260, lr_0 = 2.8610e-04
Loss = 4.9174e-03, PNorm = 168.5940, GNorm = 0.1194, lr_0 = 2.8590e-04
Loss = 5.3810e-03, PNorm = 168.6020, GNorm = 0.2710, lr_0 = 2.8571e-04
Loss = 6.8828e-03, PNorm = 168.6111, GNorm = 0.1134, lr_0 = 2.8551e-04
Loss = 4.1192e-03, PNorm = 168.6192, GNorm = 0.2042, lr_0 = 2.8532e-04
Loss = 4.3865e-03, PNorm = 168.6304, GNorm = 0.2178, lr_0 = 2.8512e-04
Loss = 4.1667e-03, PNorm = 168.6423, GNorm = 0.1169, lr_0 = 2.8493e-04
Loss = 3.7863e-03, PNorm = 168.6494, GNorm = 0.1646, lr_0 = 2.8473e-04
Loss = 3.5621e-03, PNorm = 168.6565, GNorm = 0.0667, lr_0 = 2.8454e-04
Loss = 4.1385e-03, PNorm = 168.6648, GNorm = 0.1052, lr_0 = 2.8434e-04
Loss = 3.9487e-03, PNorm = 168.6723, GNorm = 0.1891, lr_0 = 2.8415e-04
Loss = 5.4047e-03, PNorm = 168.6810, GNorm = 0.4995, lr_0 = 2.8395e-04
Loss = 5.3526e-03, PNorm = 168.6911, GNorm = 0.2188, lr_0 = 2.8376e-04
Loss = 3.9334e-03, PNorm = 168.7046, GNorm = 0.2957, lr_0 = 2.8356e-04
Loss = 4.1775e-03, PNorm = 168.7168, GNorm = 0.2795, lr_0 = 2.8337e-04
Loss = 5.8094e-03, PNorm = 168.7239, GNorm = 0.1532, lr_0 = 2.8317e-04
Loss = 4.0710e-03, PNorm = 168.7307, GNorm = 0.2465, lr_0 = 2.8298e-04
Loss = 3.9098e-03, PNorm = 168.7360, GNorm = 0.1544, lr_0 = 2.8279e-04
Loss = 4.1192e-03, PNorm = 168.7432, GNorm = 0.1507, lr_0 = 2.8259e-04
Loss = 4.0061e-03, PNorm = 168.7515, GNorm = 0.3698, lr_0 = 2.8240e-04
Loss = 3.8727e-03, PNorm = 168.7603, GNorm = 0.1408, lr_0 = 2.8221e-04
Loss = 5.6419e-03, PNorm = 168.7716, GNorm = 0.3416, lr_0 = 2.8201e-04
Loss = 4.2002e-03, PNorm = 168.7834, GNorm = 0.3337, lr_0 = 2.8182e-04
Loss = 4.2290e-03, PNorm = 168.7934, GNorm = 0.1800, lr_0 = 2.8163e-04
Loss = 6.6173e-03, PNorm = 168.8038, GNorm = 0.2016, lr_0 = 2.8143e-04
Loss = 3.1995e-03, PNorm = 168.8116, GNorm = 0.2793, lr_0 = 2.8124e-04
Loss = 4.5060e-03, PNorm = 168.8200, GNorm = 0.1291, lr_0 = 2.8105e-04
Loss = 4.4617e-03, PNorm = 168.8301, GNorm = 0.3080, lr_0 = 2.8085e-04
Loss = 4.6918e-03, PNorm = 168.8416, GNorm = 0.1259, lr_0 = 2.8066e-04
Loss = 4.5549e-03, PNorm = 168.8492, GNorm = 0.2039, lr_0 = 2.8047e-04
Loss = 4.3869e-03, PNorm = 168.8563, GNorm = 0.1102, lr_0 = 2.8028e-04
Loss = 4.8375e-03, PNorm = 168.8657, GNorm = 0.1068, lr_0 = 2.8009e-04
Loss = 4.3398e-03, PNorm = 168.8731, GNorm = 0.0886, lr_0 = 2.7989e-04
Loss = 3.7404e-03, PNorm = 168.8820, GNorm = 0.2216, lr_0 = 2.7970e-04
Loss = 5.2091e-03, PNorm = 168.8951, GNorm = 0.2391, lr_0 = 2.7951e-04
Loss = 4.5041e-03, PNorm = 168.9075, GNorm = 0.2170, lr_0 = 2.7932e-04
Loss = 5.2872e-03, PNorm = 168.9168, GNorm = 0.2153, lr_0 = 2.7913e-04
Loss = 4.9488e-03, PNorm = 168.9252, GNorm = 0.1998, lr_0 = 2.7894e-04
Loss = 3.7675e-03, PNorm = 168.9352, GNorm = 0.1134, lr_0 = 2.7875e-04
Loss = 5.2833e-03, PNorm = 168.9441, GNorm = 0.1484, lr_0 = 2.7855e-04
Loss = 5.5044e-03, PNorm = 168.9513, GNorm = 0.0829, lr_0 = 2.7836e-04
Loss = 3.5073e-03, PNorm = 168.9613, GNorm = 0.3645, lr_0 = 2.7817e-04
Loss = 4.9537e-03, PNorm = 168.9729, GNorm = 0.2321, lr_0 = 2.7798e-04
Loss = 4.1248e-03, PNorm = 168.9835, GNorm = 0.1422, lr_0 = 2.7779e-04
Loss = 3.9181e-03, PNorm = 168.9915, GNorm = 0.1671, lr_0 = 2.7760e-04
Loss = 3.3969e-03, PNorm = 168.9975, GNorm = 0.0924, lr_0 = 2.7741e-04
Loss = 4.3780e-03, PNorm = 169.0034, GNorm = 0.2004, lr_0 = 2.7722e-04
Loss = 3.9279e-03, PNorm = 169.0110, GNorm = 0.2046, lr_0 = 2.7703e-04
Loss = 4.6028e-03, PNorm = 169.0161, GNorm = 0.1637, lr_0 = 2.7684e-04
Loss = 3.9036e-03, PNorm = 169.0240, GNorm = 0.2349, lr_0 = 2.7665e-04
Loss = 5.3318e-03, PNorm = 169.0334, GNorm = 0.2691, lr_0 = 2.7646e-04
Loss = 6.5118e-03, PNorm = 169.0434, GNorm = 0.2042, lr_0 = 2.7627e-04
Loss = 4.6079e-03, PNorm = 169.0534, GNorm = 0.1924, lr_0 = 2.7608e-04
Loss = 4.1096e-03, PNorm = 169.0655, GNorm = 0.4916, lr_0 = 2.7590e-04
Loss = 6.1824e-03, PNorm = 169.0741, GNorm = 0.4261, lr_0 = 2.7571e-04
Loss = 5.0582e-03, PNorm = 169.0806, GNorm = 0.2834, lr_0 = 2.7552e-04
Loss = 5.1975e-03, PNorm = 169.0912, GNorm = 0.1319, lr_0 = 2.7533e-04
Loss = 4.5194e-03, PNorm = 169.1009, GNorm = 0.2257, lr_0 = 2.7514e-04
Loss = 4.4304e-03, PNorm = 169.1103, GNorm = 0.2030, lr_0 = 2.7495e-04
Loss = 5.5316e-03, PNorm = 169.1174, GNorm = 0.0628, lr_0 = 2.7476e-04
Loss = 4.1899e-03, PNorm = 169.1257, GNorm = 0.1763, lr_0 = 2.7457e-04
Loss = 5.1329e-03, PNorm = 169.1341, GNorm = 0.2141, lr_0 = 2.7439e-04
Loss = 3.9006e-03, PNorm = 169.1417, GNorm = 0.1911, lr_0 = 2.7420e-04
Loss = 7.9191e-03, PNorm = 169.1507, GNorm = 0.3084, lr_0 = 2.7401e-04
Loss = 5.6735e-03, PNorm = 169.1607, GNorm = 0.1047, lr_0 = 2.7382e-04
Loss = 4.5682e-03, PNorm = 169.1727, GNorm = 0.0748, lr_0 = 2.7364e-04
Loss = 4.0377e-03, PNorm = 169.1833, GNorm = 0.1549, lr_0 = 2.7345e-04
Loss = 7.2937e-03, PNorm = 169.1922, GNorm = 0.1920, lr_0 = 2.7326e-04
Loss = 3.3915e-03, PNorm = 169.2018, GNorm = 0.1148, lr_0 = 2.7307e-04
Loss = 4.1747e-03, PNorm = 169.2131, GNorm = 0.0552, lr_0 = 2.7289e-04
Loss = 4.2287e-03, PNorm = 169.2236, GNorm = 0.3057, lr_0 = 2.7270e-04
Loss = 4.6289e-03, PNorm = 169.2307, GNorm = 0.2476, lr_0 = 2.7251e-04
Loss = 4.0493e-03, PNorm = 169.2396, GNorm = 0.0862, lr_0 = 2.7233e-04
Loss = 4.5476e-03, PNorm = 169.2498, GNorm = 0.0757, lr_0 = 2.7214e-04
Loss = 4.0961e-03, PNorm = 169.2601, GNorm = 0.1371, lr_0 = 2.7195e-04
Loss = 3.2831e-03, PNorm = 169.2681, GNorm = 0.0989, lr_0 = 2.7177e-04
Loss = 3.9350e-03, PNorm = 169.2774, GNorm = 0.1112, lr_0 = 2.7158e-04
Loss = 6.0033e-03, PNorm = 169.2847, GNorm = 0.1431, lr_0 = 2.7139e-04
Loss = 5.2216e-03, PNorm = 169.2933, GNorm = 0.2714, lr_0 = 2.7121e-04
Loss = 7.2248e-03, PNorm = 169.3019, GNorm = 0.2827, lr_0 = 2.7102e-04
Loss = 4.6307e-03, PNorm = 169.3100, GNorm = 0.1644, lr_0 = 2.7084e-04
Loss = 4.4795e-03, PNorm = 169.3162, GNorm = 0.1252, lr_0 = 2.7065e-04
Loss = 3.8940e-03, PNorm = 169.3252, GNorm = 0.2035, lr_0 = 2.7047e-04
Loss = 3.8909e-03, PNorm = 169.3355, GNorm = 0.0757, lr_0 = 2.7028e-04
Loss = 4.1429e-03, PNorm = 169.3473, GNorm = 0.0968, lr_0 = 2.7010e-04
Loss = 7.0982e-03, PNorm = 169.3573, GNorm = 0.1243, lr_0 = 2.6991e-04
Loss = 3.5717e-03, PNorm = 169.3665, GNorm = 0.1616, lr_0 = 2.6973e-04
Loss = 3.5681e-03, PNorm = 169.3754, GNorm = 0.0738, lr_0 = 2.6954e-04
Loss = 4.5258e-03, PNorm = 169.3840, GNorm = 0.1245, lr_0 = 2.6936e-04
Loss = 4.7585e-03, PNorm = 169.3950, GNorm = 0.3333, lr_0 = 2.6917e-04
Loss = 4.4971e-03, PNorm = 169.4042, GNorm = 0.1243, lr_0 = 2.6899e-04
Loss = 4.2813e-03, PNorm = 169.4140, GNorm = 0.2511, lr_0 = 2.6880e-04
Loss = 3.0397e-03, PNorm = 169.4225, GNorm = 0.2112, lr_0 = 2.6862e-04
Loss = 3.4660e-03, PNorm = 169.4285, GNorm = 0.1366, lr_0 = 2.6844e-04
Loss = 3.6343e-03, PNorm = 169.4356, GNorm = 0.1908, lr_0 = 2.6825e-04
Validation mae = 0.278537
Epoch 18
Loss = 4.1922e-03, PNorm = 169.4423, GNorm = 0.2532, lr_0 = 2.6807e-04
Loss = 3.4115e-03, PNorm = 169.4495, GNorm = 0.2736, lr_0 = 2.6788e-04
Loss = 4.3067e-03, PNorm = 169.4533, GNorm = 0.2010, lr_0 = 2.6770e-04
Loss = 3.4054e-03, PNorm = 169.4581, GNorm = 0.2287, lr_0 = 2.6752e-04
Loss = 3.1835e-03, PNorm = 169.4627, GNorm = 0.0661, lr_0 = 2.6733e-04
Loss = 2.9674e-03, PNorm = 169.4704, GNorm = 0.2343, lr_0 = 2.6715e-04
Loss = 3.6545e-03, PNorm = 169.4773, GNorm = 0.3269, lr_0 = 2.6697e-04
Loss = 2.9629e-03, PNorm = 169.4857, GNorm = 0.1160, lr_0 = 2.6678e-04
Loss = 2.8139e-03, PNorm = 169.4911, GNorm = 0.0882, lr_0 = 2.6660e-04
Loss = 3.9667e-03, PNorm = 169.4970, GNorm = 0.1259, lr_0 = 2.6642e-04
Loss = 3.1396e-03, PNorm = 169.5020, GNorm = 0.0982, lr_0 = 2.6624e-04
Loss = 3.8825e-03, PNorm = 169.5065, GNorm = 0.2914, lr_0 = 2.6605e-04
Loss = 3.4266e-03, PNorm = 169.5097, GNorm = 0.1473, lr_0 = 2.6587e-04
Loss = 2.9317e-03, PNorm = 169.5157, GNorm = 0.1263, lr_0 = 2.6569e-04
Loss = 3.5700e-03, PNorm = 169.5222, GNorm = 0.5046, lr_0 = 2.6551e-04
Loss = 3.0051e-03, PNorm = 169.5295, GNorm = 0.1865, lr_0 = 2.6533e-04
Loss = 4.4137e-03, PNorm = 169.5375, GNorm = 0.2014, lr_0 = 2.6514e-04
Loss = 4.3179e-03, PNorm = 169.5449, GNorm = 0.3420, lr_0 = 2.6496e-04
Loss = 3.3617e-03, PNorm = 169.5520, GNorm = 0.1131, lr_0 = 2.6478e-04
Loss = 4.3054e-03, PNorm = 169.5600, GNorm = 0.3119, lr_0 = 2.6460e-04
Loss = 3.9559e-03, PNorm = 169.5666, GNorm = 0.1330, lr_0 = 2.6442e-04
Loss = 3.4358e-03, PNorm = 169.5710, GNorm = 0.1670, lr_0 = 2.6424e-04
Loss = 3.7910e-03, PNorm = 169.5757, GNorm = 0.0931, lr_0 = 2.6406e-04
Loss = 3.5970e-03, PNorm = 169.5833, GNorm = 0.2503, lr_0 = 2.6388e-04
Loss = 4.1145e-03, PNorm = 169.5915, GNorm = 0.0688, lr_0 = 2.6369e-04
Loss = 2.8659e-03, PNorm = 169.6001, GNorm = 0.0876, lr_0 = 2.6351e-04
Loss = 3.6617e-03, PNorm = 169.6051, GNorm = 0.0976, lr_0 = 2.6333e-04
Loss = 3.6272e-03, PNorm = 169.6141, GNorm = 0.4334, lr_0 = 2.6315e-04
Loss = 4.7943e-03, PNorm = 169.6253, GNorm = 0.1256, lr_0 = 2.6297e-04
Loss = 4.7349e-03, PNorm = 169.6345, GNorm = 0.1327, lr_0 = 2.6279e-04
Loss = 4.2088e-03, PNorm = 169.6400, GNorm = 0.0793, lr_0 = 2.6261e-04
Loss = 3.8628e-03, PNorm = 169.6445, GNorm = 0.1609, lr_0 = 2.6243e-04
Loss = 2.7900e-03, PNorm = 169.6516, GNorm = 0.0919, lr_0 = 2.6225e-04
Loss = 5.0325e-03, PNorm = 169.6592, GNorm = 0.0927, lr_0 = 2.6207e-04
Loss = 3.3075e-03, PNorm = 169.6652, GNorm = 0.1661, lr_0 = 2.6189e-04
Loss = 6.2949e-03, PNorm = 169.6716, GNorm = 0.1603, lr_0 = 2.6171e-04
Loss = 4.4384e-03, PNorm = 169.6773, GNorm = 0.0750, lr_0 = 2.6153e-04
Loss = 3.1695e-03, PNorm = 169.6879, GNorm = 0.1099, lr_0 = 2.6136e-04
Loss = 3.9148e-03, PNorm = 169.6952, GNorm = 0.2042, lr_0 = 2.6118e-04
Loss = 5.6340e-03, PNorm = 169.7016, GNorm = 0.1346, lr_0 = 2.6100e-04
Loss = 3.4014e-03, PNorm = 169.7056, GNorm = 0.1155, lr_0 = 2.6082e-04
Loss = 5.2163e-03, PNorm = 169.7105, GNorm = 0.2474, lr_0 = 2.6064e-04
Loss = 4.2642e-03, PNorm = 169.7144, GNorm = 0.1161, lr_0 = 2.6046e-04
Loss = 4.3495e-03, PNorm = 169.7226, GNorm = 0.1547, lr_0 = 2.6028e-04
Loss = 3.1079e-03, PNorm = 169.7349, GNorm = 0.3523, lr_0 = 2.6011e-04
Loss = 4.7645e-03, PNorm = 169.7450, GNorm = 0.3600, lr_0 = 2.5993e-04
Loss = 3.2822e-03, PNorm = 169.7531, GNorm = 0.1002, lr_0 = 2.5975e-04
Loss = 5.6104e-03, PNorm = 169.7612, GNorm = 0.2202, lr_0 = 2.5957e-04
Loss = 5.3703e-03, PNorm = 169.7657, GNorm = 0.1484, lr_0 = 2.5939e-04
Loss = 4.3732e-03, PNorm = 169.7736, GNorm = 0.1271, lr_0 = 2.5922e-04
Loss = 3.2962e-03, PNorm = 169.7803, GNorm = 0.2008, lr_0 = 2.5904e-04
Loss = 4.3908e-03, PNorm = 169.7907, GNorm = 0.1453, lr_0 = 2.5886e-04
Loss = 3.1082e-03, PNorm = 169.7981, GNorm = 0.0524, lr_0 = 2.5868e-04
Loss = 4.2228e-03, PNorm = 169.8053, GNorm = 0.3086, lr_0 = 2.5851e-04
Loss = 2.9838e-03, PNorm = 169.8137, GNorm = 0.1665, lr_0 = 2.5833e-04
Loss = 4.5588e-03, PNorm = 169.8208, GNorm = 0.1097, lr_0 = 2.5815e-04
Loss = 3.0068e-03, PNorm = 169.8276, GNorm = 0.1886, lr_0 = 2.5797e-04
Loss = 3.8910e-03, PNorm = 169.8338, GNorm = 0.2008, lr_0 = 2.5780e-04
Loss = 3.5683e-03, PNorm = 169.8410, GNorm = 0.3490, lr_0 = 2.5762e-04
Loss = 3.1729e-03, PNorm = 169.8460, GNorm = 0.0637, lr_0 = 2.5745e-04
Loss = 4.4737e-03, PNorm = 169.8522, GNorm = 0.1346, lr_0 = 2.5727e-04
Loss = 3.3543e-03, PNorm = 169.8606, GNorm = 0.1176, lr_0 = 2.5709e-04
Loss = 3.2824e-03, PNorm = 169.8688, GNorm = 0.0476, lr_0 = 2.5692e-04
Loss = 4.0409e-03, PNorm = 169.8751, GNorm = 0.1708, lr_0 = 2.5674e-04
Loss = 3.9518e-03, PNorm = 169.8816, GNorm = 0.1977, lr_0 = 2.5656e-04
Loss = 3.7744e-03, PNorm = 169.8885, GNorm = 0.2483, lr_0 = 2.5639e-04
Loss = 3.9719e-03, PNorm = 169.8976, GNorm = 0.1758, lr_0 = 2.5621e-04
Loss = 3.2560e-03, PNorm = 169.9044, GNorm = 0.2101, lr_0 = 2.5604e-04
Loss = 4.6341e-03, PNorm = 169.9145, GNorm = 0.0723, lr_0 = 2.5586e-04
Loss = 4.9685e-03, PNorm = 169.9249, GNorm = 0.0676, lr_0 = 2.5569e-04
Loss = 3.4532e-03, PNorm = 169.9343, GNorm = 0.1337, lr_0 = 2.5551e-04
Loss = 3.9623e-03, PNorm = 169.9447, GNorm = 0.2143, lr_0 = 2.5534e-04
Loss = 5.1788e-03, PNorm = 169.9518, GNorm = 0.1169, lr_0 = 2.5516e-04
Loss = 3.8592e-03, PNorm = 169.9588, GNorm = 0.7003, lr_0 = 2.5499e-04
Loss = 3.7830e-03, PNorm = 169.9629, GNorm = 0.4140, lr_0 = 2.5481e-04
Loss = 4.0336e-03, PNorm = 169.9700, GNorm = 0.3997, lr_0 = 2.5464e-04
Loss = 5.0242e-03, PNorm = 169.9775, GNorm = 0.2036, lr_0 = 2.5446e-04
Loss = 5.6418e-03, PNorm = 169.9838, GNorm = 0.0398, lr_0 = 2.5429e-04
Loss = 4.5696e-03, PNorm = 169.9909, GNorm = 0.2103, lr_0 = 2.5411e-04
Loss = 4.4433e-03, PNorm = 169.9989, GNorm = 0.1093, lr_0 = 2.5394e-04
Loss = 3.9668e-03, PNorm = 170.0085, GNorm = 0.1231, lr_0 = 2.5377e-04
Loss = 5.6045e-03, PNorm = 170.0171, GNorm = 0.0865, lr_0 = 2.5359e-04
Loss = 4.0661e-03, PNorm = 170.0251, GNorm = 0.1959, lr_0 = 2.5342e-04
Loss = 3.7731e-03, PNorm = 170.0374, GNorm = 0.2129, lr_0 = 2.5325e-04
Loss = 8.2567e-03, PNorm = 170.0445, GNorm = 0.1700, lr_0 = 2.5307e-04
Loss = 3.5159e-03, PNorm = 170.0505, GNorm = 0.1453, lr_0 = 2.5290e-04
Loss = 4.6124e-03, PNorm = 170.0574, GNorm = 0.3102, lr_0 = 2.5273e-04
Loss = 3.9815e-03, PNorm = 170.0651, GNorm = 0.2002, lr_0 = 2.5255e-04
Loss = 3.5949e-03, PNorm = 170.0749, GNorm = 0.1686, lr_0 = 2.5238e-04
Loss = 4.7145e-03, PNorm = 170.0853, GNorm = 0.1147, lr_0 = 2.5221e-04
Loss = 3.8819e-03, PNorm = 170.0973, GNorm = 0.0835, lr_0 = 2.5203e-04
Loss = 4.5035e-03, PNorm = 170.1074, GNorm = 0.1857, lr_0 = 2.5186e-04
Loss = 4.4067e-03, PNorm = 170.1151, GNorm = 0.1077, lr_0 = 2.5169e-04
Loss = 3.9666e-03, PNorm = 170.1232, GNorm = 0.0782, lr_0 = 2.5152e-04
Loss = 4.2459e-03, PNorm = 170.1313, GNorm = 0.0557, lr_0 = 2.5134e-04
Loss = 3.8453e-03, PNorm = 170.1419, GNorm = 0.2403, lr_0 = 2.5117e-04
Loss = 2.9157e-03, PNorm = 170.1520, GNorm = 0.1110, lr_0 = 2.5100e-04
Loss = 7.6575e-03, PNorm = 170.1576, GNorm = 0.1847, lr_0 = 2.5083e-04
Loss = 3.3905e-03, PNorm = 170.1615, GNorm = 0.0585, lr_0 = 2.5066e-04
Loss = 3.9052e-03, PNorm = 170.1703, GNorm = 0.2107, lr_0 = 2.5048e-04
Loss = 3.4216e-03, PNorm = 170.1771, GNorm = 0.1928, lr_0 = 2.5031e-04
Loss = 2.9806e-03, PNorm = 170.1867, GNorm = 0.1400, lr_0 = 2.5014e-04
Loss = 4.8229e-03, PNorm = 170.1981, GNorm = 0.5219, lr_0 = 2.4997e-04
Loss = 6.4107e-03, PNorm = 170.2064, GNorm = 0.3383, lr_0 = 2.4980e-04
Loss = 3.2366e-03, PNorm = 170.2153, GNorm = 0.1689, lr_0 = 2.4963e-04
Loss = 3.4221e-03, PNorm = 170.2212, GNorm = 0.0689, lr_0 = 2.4946e-04
Loss = 2.9316e-03, PNorm = 170.2269, GNorm = 0.1181, lr_0 = 2.4929e-04
Loss = 3.8692e-03, PNorm = 170.2353, GNorm = 0.0747, lr_0 = 2.4911e-04
Loss = 3.8268e-03, PNorm = 170.2471, GNorm = 0.1131, lr_0 = 2.4894e-04
Loss = 3.4054e-03, PNorm = 170.2553, GNorm = 0.2013, lr_0 = 2.4877e-04
Loss = 5.5038e-03, PNorm = 170.2623, GNorm = 0.0985, lr_0 = 2.4860e-04
Loss = 3.8140e-03, PNorm = 170.2687, GNorm = 0.0887, lr_0 = 2.4843e-04
Loss = 3.0406e-03, PNorm = 170.2741, GNorm = 0.0782, lr_0 = 2.4826e-04
Loss = 3.6292e-03, PNorm = 170.2829, GNorm = 0.1247, lr_0 = 2.4809e-04
Loss = 3.6490e-03, PNorm = 170.2929, GNorm = 0.1867, lr_0 = 2.4792e-04
Loss = 2.9799e-03, PNorm = 170.3012, GNorm = 0.2133, lr_0 = 2.4775e-04
Loss = 3.3410e-03, PNorm = 170.3074, GNorm = 0.0922, lr_0 = 2.4758e-04
Loss = 4.6928e-03, PNorm = 170.3128, GNorm = 0.4290, lr_0 = 2.4741e-04
Loss = 3.5028e-03, PNorm = 170.3169, GNorm = 0.1855, lr_0 = 2.4724e-04
Loss = 3.3456e-03, PNorm = 170.3231, GNorm = 0.1249, lr_0 = 2.4707e-04
Validation mae = 0.278291
Epoch 19
Loss = 4.0800e-03, PNorm = 170.3301, GNorm = 0.1887, lr_0 = 2.4690e-04
Loss = 3.2171e-03, PNorm = 170.3360, GNorm = 0.2125, lr_0 = 2.4674e-04
Loss = 3.6359e-03, PNorm = 170.3421, GNorm = 0.2640, lr_0 = 2.4657e-04
Loss = 2.7462e-03, PNorm = 170.3447, GNorm = 0.2063, lr_0 = 2.4640e-04
Loss = 3.4949e-03, PNorm = 170.3507, GNorm = 0.1977, lr_0 = 2.4623e-04
Loss = 3.4772e-03, PNorm = 170.3572, GNorm = 0.1231, lr_0 = 2.4606e-04
Loss = 2.8181e-03, PNorm = 170.3640, GNorm = 0.1208, lr_0 = 2.4589e-04
Loss = 3.3760e-03, PNorm = 170.3700, GNorm = 0.2267, lr_0 = 2.4572e-04
Loss = 2.7664e-03, PNorm = 170.3737, GNorm = 0.2454, lr_0 = 2.4556e-04
Loss = 3.2222e-03, PNorm = 170.3777, GNorm = 0.1252, lr_0 = 2.4539e-04
Loss = 3.2356e-03, PNorm = 170.3811, GNorm = 0.0763, lr_0 = 2.4522e-04
Loss = 2.7699e-03, PNorm = 170.3861, GNorm = 0.1731, lr_0 = 2.4505e-04
Loss = 3.0106e-03, PNorm = 170.3911, GNorm = 0.0575, lr_0 = 2.4488e-04
Loss = 3.4601e-03, PNorm = 170.3995, GNorm = 0.3970, lr_0 = 2.4472e-04
Loss = 3.3765e-03, PNorm = 170.4072, GNorm = 0.1171, lr_0 = 2.4455e-04
Loss = 2.7578e-03, PNorm = 170.4125, GNorm = 0.2664, lr_0 = 2.4438e-04
Loss = 3.6385e-03, PNorm = 170.4161, GNorm = 0.2355, lr_0 = 2.4421e-04
Loss = 3.2198e-03, PNorm = 170.4204, GNorm = 0.1146, lr_0 = 2.4405e-04
Loss = 3.2246e-03, PNorm = 170.4277, GNorm = 0.1851, lr_0 = 2.4388e-04
Loss = 3.4001e-03, PNorm = 170.4336, GNorm = 0.4233, lr_0 = 2.4371e-04
Loss = 3.1803e-03, PNorm = 170.4397, GNorm = 0.1622, lr_0 = 2.4354e-04
Loss = 3.7425e-03, PNorm = 170.4463, GNorm = 0.5615, lr_0 = 2.4338e-04
Loss = 4.1413e-03, PNorm = 170.4540, GNorm = 0.2769, lr_0 = 2.4321e-04
Loss = 3.6623e-03, PNorm = 170.4631, GNorm = 0.0997, lr_0 = 2.4304e-04
Loss = 3.4212e-03, PNorm = 170.4700, GNorm = 0.1030, lr_0 = 2.4288e-04
Loss = 2.4353e-03, PNorm = 170.4773, GNorm = 0.0825, lr_0 = 2.4271e-04
Loss = 3.4998e-03, PNorm = 170.4829, GNorm = 0.3874, lr_0 = 2.4254e-04
Loss = 3.3058e-03, PNorm = 170.4894, GNorm = 0.2337, lr_0 = 2.4238e-04
Loss = 2.5902e-03, PNorm = 170.4937, GNorm = 0.1510, lr_0 = 2.4221e-04
Loss = 3.7530e-03, PNorm = 170.5004, GNorm = 0.2400, lr_0 = 2.4205e-04
Loss = 2.8977e-03, PNorm = 170.5061, GNorm = 0.1638, lr_0 = 2.4188e-04
Loss = 3.4915e-03, PNorm = 170.5115, GNorm = 0.3110, lr_0 = 2.4171e-04
Loss = 3.8245e-03, PNorm = 170.5174, GNorm = 0.0862, lr_0 = 2.4155e-04
Loss = 4.6803e-03, PNorm = 170.5227, GNorm = 0.1861, lr_0 = 2.4138e-04
Loss = 2.9986e-03, PNorm = 170.5273, GNorm = 0.1598, lr_0 = 2.4122e-04
Loss = 2.7283e-03, PNorm = 170.5339, GNorm = 0.2667, lr_0 = 2.4105e-04
Loss = 2.2856e-03, PNorm = 170.5394, GNorm = 0.1832, lr_0 = 2.4089e-04
Loss = 3.2309e-03, PNorm = 170.5432, GNorm = 0.1889, lr_0 = 2.4072e-04
Loss = 2.7269e-03, PNorm = 170.5492, GNorm = 0.1541, lr_0 = 2.4056e-04
Loss = 4.0234e-03, PNorm = 170.5572, GNorm = 0.2396, lr_0 = 2.4039e-04
Loss = 2.7866e-03, PNorm = 170.5652, GNorm = 0.2024, lr_0 = 2.4023e-04
Loss = 4.5302e-03, PNorm = 170.5719, GNorm = 0.2458, lr_0 = 2.4006e-04
Loss = 2.8243e-03, PNorm = 170.5824, GNorm = 0.1319, lr_0 = 2.3990e-04
Loss = 4.2637e-03, PNorm = 170.5891, GNorm = 0.2394, lr_0 = 2.3974e-04
Loss = 4.8042e-03, PNorm = 170.5959, GNorm = 0.1867, lr_0 = 2.3957e-04
Loss = 3.2065e-03, PNorm = 170.6011, GNorm = 0.1066, lr_0 = 2.3941e-04
Loss = 2.8566e-03, PNorm = 170.6077, GNorm = 0.3196, lr_0 = 2.3924e-04
Loss = 4.1516e-03, PNorm = 170.6161, GNorm = 0.0646, lr_0 = 2.3908e-04
Loss = 2.8430e-03, PNorm = 170.6215, GNorm = 0.2475, lr_0 = 2.3892e-04
Loss = 3.2560e-03, PNorm = 170.6293, GNorm = 0.1079, lr_0 = 2.3875e-04
Loss = 2.7596e-03, PNorm = 170.6349, GNorm = 0.1351, lr_0 = 2.3859e-04
Loss = 3.2592e-03, PNorm = 170.6402, GNorm = 0.0569, lr_0 = 2.3842e-04
Loss = 5.4825e-03, PNorm = 170.6459, GNorm = 0.1534, lr_0 = 2.3826e-04
Loss = 3.1090e-03, PNorm = 170.6517, GNorm = 0.3605, lr_0 = 2.3810e-04
Loss = 3.4737e-03, PNorm = 170.6589, GNorm = 0.3535, lr_0 = 2.3794e-04
Loss = 3.9782e-03, PNorm = 170.6663, GNorm = 0.1818, lr_0 = 2.3777e-04
Loss = 4.6417e-03, PNorm = 170.6720, GNorm = 0.2517, lr_0 = 2.3761e-04
Loss = 5.1627e-03, PNorm = 170.6779, GNorm = 0.1443, lr_0 = 2.3745e-04
Loss = 3.0626e-03, PNorm = 170.6841, GNorm = 0.1208, lr_0 = 2.3728e-04
Loss = 3.2665e-03, PNorm = 170.6910, GNorm = 0.2604, lr_0 = 2.3712e-04
Loss = 2.9868e-03, PNorm = 170.6988, GNorm = 0.4009, lr_0 = 2.3696e-04
Loss = 4.0397e-03, PNorm = 170.7057, GNorm = 0.1197, lr_0 = 2.3680e-04
Loss = 3.1785e-03, PNorm = 170.7122, GNorm = 0.3094, lr_0 = 2.3663e-04
Loss = 3.9822e-03, PNorm = 170.7192, GNorm = 0.1443, lr_0 = 2.3647e-04
Loss = 2.5434e-03, PNorm = 170.7266, GNorm = 0.2488, lr_0 = 2.3631e-04
Loss = 3.3684e-03, PNorm = 170.7351, GNorm = 0.1157, lr_0 = 2.3615e-04
Loss = 2.8291e-03, PNorm = 170.7418, GNorm = 0.0901, lr_0 = 2.3599e-04
Loss = 2.8299e-03, PNorm = 170.7503, GNorm = 0.1124, lr_0 = 2.3582e-04
Loss = 3.1545e-03, PNorm = 170.7603, GNorm = 0.1033, lr_0 = 2.3566e-04
Loss = 2.9993e-03, PNorm = 170.7659, GNorm = 0.0734, lr_0 = 2.3550e-04
Loss = 3.8684e-03, PNorm = 170.7710, GNorm = 0.1175, lr_0 = 2.3534e-04
Loss = 3.9850e-03, PNorm = 170.7754, GNorm = 0.0713, lr_0 = 2.3518e-04
Loss = 3.5081e-03, PNorm = 170.7815, GNorm = 0.1680, lr_0 = 2.3502e-04
Loss = 2.6555e-03, PNorm = 170.7859, GNorm = 0.0765, lr_0 = 2.3486e-04
Loss = 4.1344e-03, PNorm = 170.7950, GNorm = 0.2210, lr_0 = 2.3470e-04
Loss = 4.2091e-03, PNorm = 170.8027, GNorm = 0.2920, lr_0 = 2.3454e-04
Loss = 4.3659e-03, PNorm = 170.8078, GNorm = 0.0718, lr_0 = 2.3437e-04
Loss = 3.5369e-03, PNorm = 170.8120, GNorm = 0.1610, lr_0 = 2.3421e-04
Loss = 3.9973e-03, PNorm = 170.8168, GNorm = 0.2265, lr_0 = 2.3405e-04
Loss = 3.9504e-03, PNorm = 170.8234, GNorm = 0.2459, lr_0 = 2.3389e-04
Loss = 2.7329e-03, PNorm = 170.8323, GNorm = 0.3760, lr_0 = 2.3373e-04
Loss = 2.8285e-03, PNorm = 170.8379, GNorm = 0.0993, lr_0 = 2.3357e-04
Loss = 3.3696e-03, PNorm = 170.8445, GNorm = 0.1487, lr_0 = 2.3341e-04
Loss = 4.2633e-03, PNorm = 170.8538, GNorm = 0.1952, lr_0 = 2.3325e-04
Loss = 6.3078e-03, PNorm = 170.8562, GNorm = 0.2300, lr_0 = 2.3309e-04
Loss = 3.0085e-03, PNorm = 170.8596, GNorm = 0.1277, lr_0 = 2.3293e-04
Loss = 4.9877e-03, PNorm = 170.8659, GNorm = 0.1321, lr_0 = 2.3277e-04
Loss = 5.1036e-03, PNorm = 170.8738, GNorm = 0.3108, lr_0 = 2.3261e-04
Loss = 2.5052e-03, PNorm = 170.8839, GNorm = 0.0819, lr_0 = 2.3246e-04
Loss = 3.5099e-03, PNorm = 170.8901, GNorm = 0.0500, lr_0 = 2.3230e-04
Loss = 3.5856e-03, PNorm = 170.8924, GNorm = 0.1927, lr_0 = 2.3214e-04
Loss = 6.6858e-03, PNorm = 170.8993, GNorm = 0.2405, lr_0 = 2.3198e-04
Loss = 3.9751e-03, PNorm = 170.9091, GNorm = 0.2067, lr_0 = 2.3182e-04
Loss = 2.7477e-03, PNorm = 170.9187, GNorm = 0.1420, lr_0 = 2.3166e-04
Loss = 4.2502e-03, PNorm = 170.9277, GNorm = 0.1534, lr_0 = 2.3150e-04
Loss = 2.3803e-03, PNorm = 170.9341, GNorm = 0.1761, lr_0 = 2.3134e-04
Loss = 5.3416e-03, PNorm = 170.9402, GNorm = 0.1498, lr_0 = 2.3118e-04
Loss = 2.4319e-03, PNorm = 170.9468, GNorm = 0.0869, lr_0 = 2.3103e-04
Loss = 2.7910e-03, PNorm = 170.9537, GNorm = 0.0882, lr_0 = 2.3087e-04
Loss = 3.4628e-03, PNorm = 170.9601, GNorm = 0.2319, lr_0 = 2.3071e-04
Loss = 3.9102e-03, PNorm = 170.9697, GNorm = 0.0823, lr_0 = 2.3055e-04
Loss = 4.2174e-03, PNorm = 170.9788, GNorm = 0.0664, lr_0 = 2.3039e-04
Loss = 3.7741e-03, PNorm = 170.9874, GNorm = 0.1503, lr_0 = 2.3024e-04
Loss = 2.5371e-03, PNorm = 170.9936, GNorm = 0.0877, lr_0 = 2.3008e-04
Loss = 4.3931e-03, PNorm = 170.9961, GNorm = 0.2162, lr_0 = 2.2992e-04
Loss = 2.4386e-03, PNorm = 171.0036, GNorm = 0.1651, lr_0 = 2.2976e-04
Loss = 6.7022e-03, PNorm = 171.0104, GNorm = 0.0764, lr_0 = 2.2961e-04
Loss = 2.3395e-03, PNorm = 171.0165, GNorm = 0.0997, lr_0 = 2.2945e-04
Loss = 3.4801e-03, PNorm = 171.0220, GNorm = 0.0846, lr_0 = 2.2929e-04
Loss = 4.8683e-03, PNorm = 171.0251, GNorm = 0.3316, lr_0 = 2.2913e-04
Loss = 4.8729e-03, PNorm = 171.0287, GNorm = 0.0619, lr_0 = 2.2898e-04
Loss = 3.2498e-03, PNorm = 171.0324, GNorm = 0.0818, lr_0 = 2.2882e-04
Loss = 3.1501e-03, PNorm = 171.0384, GNorm = 0.1123, lr_0 = 2.2866e-04
Loss = 5.1493e-03, PNorm = 171.0459, GNorm = 0.3097, lr_0 = 2.2851e-04
Loss = 3.7304e-03, PNorm = 171.0539, GNorm = 0.1603, lr_0 = 2.2835e-04
Loss = 3.1582e-03, PNorm = 171.0598, GNorm = 0.0654, lr_0 = 2.2819e-04
Loss = 2.1742e-03, PNorm = 171.0679, GNorm = 0.0634, lr_0 = 2.2804e-04
Loss = 3.5010e-03, PNorm = 171.0752, GNorm = 0.0800, lr_0 = 2.2788e-04
Loss = 3.3713e-03, PNorm = 171.0813, GNorm = 0.1218, lr_0 = 2.2773e-04
Loss = 2.6763e-03, PNorm = 171.0868, GNorm = 0.2148, lr_0 = 2.2757e-04
Validation mae = 0.278406
Epoch 20
Loss = 2.2240e-03, PNorm = 171.0918, GNorm = 0.0812, lr_0 = 2.2741e-04
Loss = 4.0233e-03, PNorm = 171.0953, GNorm = 0.0814, lr_0 = 2.2726e-04
Loss = 3.6390e-03, PNorm = 171.1027, GNorm = 0.0784, lr_0 = 2.2710e-04
Loss = 4.0543e-03, PNorm = 171.1073, GNorm = 0.1435, lr_0 = 2.2695e-04
Loss = 3.5901e-03, PNorm = 171.1108, GNorm = 0.2032, lr_0 = 2.2679e-04
Loss = 2.4429e-03, PNorm = 171.1152, GNorm = 0.0814, lr_0 = 2.2664e-04
Loss = 2.4793e-03, PNorm = 171.1166, GNorm = 0.1874, lr_0 = 2.2648e-04
Loss = 2.8854e-03, PNorm = 171.1203, GNorm = 0.1754, lr_0 = 2.2632e-04
Loss = 2.2775e-03, PNorm = 171.1272, GNorm = 0.0983, lr_0 = 2.2617e-04
Loss = 2.9035e-03, PNorm = 171.1350, GNorm = 0.3062, lr_0 = 2.2601e-04
Loss = 2.8488e-03, PNorm = 171.1409, GNorm = 0.0894, lr_0 = 2.2586e-04
Loss = 3.1564e-03, PNorm = 171.1451, GNorm = 0.0425, lr_0 = 2.2571e-04
Loss = 2.1923e-03, PNorm = 171.1471, GNorm = 0.1848, lr_0 = 2.2555e-04
Loss = 3.6347e-03, PNorm = 171.1513, GNorm = 0.2961, lr_0 = 2.2540e-04
Loss = 4.0737e-03, PNorm = 171.1582, GNorm = 0.1530, lr_0 = 2.2524e-04
Loss = 3.4017e-03, PNorm = 171.1628, GNorm = 0.1092, lr_0 = 2.2509e-04
Loss = 3.5180e-03, PNorm = 171.1699, GNorm = 0.2916, lr_0 = 2.2493e-04
Loss = 4.6540e-03, PNorm = 171.1753, GNorm = 0.1674, lr_0 = 2.2478e-04
Loss = 2.1978e-03, PNorm = 171.1803, GNorm = 0.0802, lr_0 = 2.2463e-04
Loss = 2.8243e-03, PNorm = 171.1852, GNorm = 0.0703, lr_0 = 2.2447e-04
Loss = 3.1545e-03, PNorm = 171.1907, GNorm = 0.1298, lr_0 = 2.2432e-04
Loss = 2.1756e-03, PNorm = 171.1953, GNorm = 0.1786, lr_0 = 2.2416e-04
Loss = 2.4555e-03, PNorm = 171.2012, GNorm = 0.1594, lr_0 = 2.2401e-04
Loss = 2.7214e-03, PNorm = 171.2060, GNorm = 0.0823, lr_0 = 2.2386e-04
Loss = 3.1785e-03, PNorm = 171.2102, GNorm = 0.1548, lr_0 = 2.2370e-04
Loss = 1.9927e-03, PNorm = 171.2144, GNorm = 0.1600, lr_0 = 2.2355e-04
Loss = 2.7367e-03, PNorm = 171.2194, GNorm = 0.1938, lr_0 = 2.2340e-04
Loss = 4.7613e-03, PNorm = 171.2246, GNorm = 0.1163, lr_0 = 2.2324e-04
Loss = 3.4582e-03, PNorm = 171.2278, GNorm = 0.1402, lr_0 = 2.2309e-04
Loss = 3.5965e-03, PNorm = 171.2307, GNorm = 0.0910, lr_0 = 2.2294e-04
Loss = 2.7109e-03, PNorm = 171.2358, GNorm = 0.1018, lr_0 = 2.2279e-04
Loss = 2.7129e-03, PNorm = 171.2418, GNorm = 0.0933, lr_0 = 2.2263e-04
Loss = 2.2536e-03, PNorm = 171.2454, GNorm = 0.3130, lr_0 = 2.2248e-04
Loss = 2.9609e-03, PNorm = 171.2509, GNorm = 0.1506, lr_0 = 2.2233e-04
Loss = 4.6030e-03, PNorm = 171.2562, GNorm = 0.1763, lr_0 = 2.2218e-04
Loss = 2.1904e-03, PNorm = 171.2623, GNorm = 0.1143, lr_0 = 2.2202e-04
Loss = 2.5911e-03, PNorm = 171.2667, GNorm = 0.0913, lr_0 = 2.2187e-04
Loss = 2.6043e-03, PNorm = 171.2718, GNorm = 0.0698, lr_0 = 2.2172e-04
Loss = 3.0478e-03, PNorm = 171.2761, GNorm = 0.0866, lr_0 = 2.2157e-04
Loss = 2.4265e-03, PNorm = 171.2802, GNorm = 0.0548, lr_0 = 2.2142e-04
Loss = 2.9831e-03, PNorm = 171.2837, GNorm = 0.2139, lr_0 = 2.2126e-04
Loss = 2.7135e-03, PNorm = 171.2878, GNorm = 0.0709, lr_0 = 2.2111e-04
Loss = 2.9748e-03, PNorm = 171.2916, GNorm = 0.3073, lr_0 = 2.2096e-04
Loss = 3.7682e-03, PNorm = 171.2986, GNorm = 0.0840, lr_0 = 2.2081e-04
Loss = 2.5117e-03, PNorm = 171.3054, GNorm = 0.1230, lr_0 = 2.2066e-04
Loss = 3.8072e-03, PNorm = 171.3119, GNorm = 0.3573, lr_0 = 2.2051e-04
Loss = 2.6754e-03, PNorm = 171.3172, GNorm = 0.2323, lr_0 = 2.2036e-04
Loss = 2.6440e-03, PNorm = 171.3213, GNorm = 0.3690, lr_0 = 2.2021e-04
Loss = 2.7339e-03, PNorm = 171.3256, GNorm = 0.1234, lr_0 = 2.2005e-04
Loss = 3.9735e-03, PNorm = 171.3297, GNorm = 0.2487, lr_0 = 2.1990e-04
Loss = 4.1807e-03, PNorm = 171.3330, GNorm = 0.2557, lr_0 = 2.1975e-04
Loss = 2.4058e-03, PNorm = 171.3378, GNorm = 0.1429, lr_0 = 2.1960e-04
Loss = 3.8130e-03, PNorm = 171.3433, GNorm = 0.2340, lr_0 = 2.1945e-04
Loss = 2.8708e-03, PNorm = 171.3483, GNorm = 0.1725, lr_0 = 2.1930e-04
Loss = 3.2865e-03, PNorm = 171.3552, GNorm = 0.6194, lr_0 = 2.1915e-04
Loss = 2.3595e-03, PNorm = 171.3615, GNorm = 0.2168, lr_0 = 2.1900e-04
Loss = 2.8421e-03, PNorm = 171.3673, GNorm = 0.2238, lr_0 = 2.1885e-04
Loss = 3.3256e-03, PNorm = 171.3743, GNorm = 0.1399, lr_0 = 2.1870e-04
Loss = 2.2995e-03, PNorm = 171.3794, GNorm = 0.1330, lr_0 = 2.1855e-04
Loss = 2.4993e-03, PNorm = 171.3846, GNorm = 0.0962, lr_0 = 2.1840e-04
Loss = 2.4142e-03, PNorm = 171.3887, GNorm = 0.1361, lr_0 = 2.1825e-04
Loss = 2.3267e-03, PNorm = 171.3942, GNorm = 0.1442, lr_0 = 2.1810e-04
Loss = 3.9609e-03, PNorm = 171.3994, GNorm = 0.1505, lr_0 = 2.1795e-04
Loss = 3.0094e-03, PNorm = 171.4055, GNorm = 0.1962, lr_0 = 2.1780e-04
Loss = 2.8514e-03, PNorm = 171.4118, GNorm = 0.2347, lr_0 = 2.1765e-04
Loss = 2.3769e-03, PNorm = 171.4199, GNorm = 0.1147, lr_0 = 2.1751e-04
Loss = 2.7064e-03, PNorm = 171.4259, GNorm = 0.1946, lr_0 = 2.1736e-04
Loss = 2.2862e-03, PNorm = 171.4297, GNorm = 0.1068, lr_0 = 2.1721e-04
Loss = 3.5387e-03, PNorm = 171.4323, GNorm = 0.1291, lr_0 = 2.1706e-04
Loss = 4.1235e-03, PNorm = 171.4360, GNorm = 0.0651, lr_0 = 2.1691e-04
Loss = 2.2053e-03, PNorm = 171.4398, GNorm = 0.2582, lr_0 = 2.1676e-04
Loss = 2.4262e-03, PNorm = 171.4430, GNorm = 0.0763, lr_0 = 2.1661e-04
Loss = 1.9006e-03, PNorm = 171.4490, GNorm = 0.1102, lr_0 = 2.1646e-04
Loss = 4.6285e-03, PNorm = 171.4550, GNorm = 0.1018, lr_0 = 2.1632e-04
Loss = 3.1423e-03, PNorm = 171.4610, GNorm = 0.2607, lr_0 = 2.1617e-04
Loss = 2.7420e-03, PNorm = 171.4658, GNorm = 0.1996, lr_0 = 2.1602e-04
Loss = 4.9843e-03, PNorm = 171.4711, GNorm = 0.0719, lr_0 = 2.1587e-04
Loss = 2.7654e-03, PNorm = 171.4761, GNorm = 0.0936, lr_0 = 2.1572e-04
Loss = 2.9632e-03, PNorm = 171.4836, GNorm = 0.1328, lr_0 = 2.1558e-04
Loss = 3.7362e-03, PNorm = 171.4911, GNorm = 0.0496, lr_0 = 2.1543e-04
Loss = 4.7499e-03, PNorm = 171.4953, GNorm = 0.2047, lr_0 = 2.1528e-04
Loss = 2.7391e-03, PNorm = 171.5007, GNorm = 0.2639, lr_0 = 2.1513e-04
Loss = 2.0504e-03, PNorm = 171.5066, GNorm = 0.1571, lr_0 = 2.1499e-04
Loss = 2.6166e-03, PNorm = 171.5109, GNorm = 0.1429, lr_0 = 2.1484e-04
Loss = 2.4626e-03, PNorm = 171.5147, GNorm = 0.1454, lr_0 = 2.1469e-04
Loss = 2.7860e-03, PNorm = 171.5189, GNorm = 0.0464, lr_0 = 2.1454e-04
Loss = 2.6238e-03, PNorm = 171.5234, GNorm = 0.1173, lr_0 = 2.1440e-04
Loss = 2.0385e-03, PNorm = 171.5297, GNorm = 0.1221, lr_0 = 2.1425e-04
Loss = 5.0899e-03, PNorm = 171.5356, GNorm = 0.3907, lr_0 = 2.1410e-04
Loss = 3.7865e-03, PNorm = 171.5410, GNorm = 0.7079, lr_0 = 2.1396e-04
Loss = 2.2729e-03, PNorm = 171.5462, GNorm = 0.0879, lr_0 = 2.1381e-04
Loss = 3.0844e-03, PNorm = 171.5506, GNorm = 0.1620, lr_0 = 2.1366e-04
Loss = 2.7025e-03, PNorm = 171.5568, GNorm = 0.1508, lr_0 = 2.1352e-04
Loss = 4.4502e-03, PNorm = 171.5654, GNorm = 0.0961, lr_0 = 2.1337e-04
Loss = 2.7526e-03, PNorm = 171.5723, GNorm = 0.1510, lr_0 = 2.1323e-04
Loss = 2.6321e-03, PNorm = 171.5774, GNorm = 0.0593, lr_0 = 2.1308e-04
Loss = 3.0879e-03, PNorm = 171.5825, GNorm = 0.0562, lr_0 = 2.1293e-04
Loss = 2.8385e-03, PNorm = 171.5867, GNorm = 0.0834, lr_0 = 2.1279e-04
Loss = 5.8550e-03, PNorm = 171.5917, GNorm = 0.1143, lr_0 = 2.1264e-04
Loss = 3.0450e-03, PNorm = 171.5970, GNorm = 0.1869, lr_0 = 2.1250e-04
Loss = 2.8266e-03, PNorm = 171.6020, GNorm = 0.1941, lr_0 = 2.1235e-04
Loss = 5.3111e-03, PNorm = 171.6065, GNorm = 0.1086, lr_0 = 2.1221e-04
Loss = 2.9361e-03, PNorm = 171.6116, GNorm = 0.2411, lr_0 = 2.1206e-04
Loss = 2.7084e-03, PNorm = 171.6140, GNorm = 0.0950, lr_0 = 2.1191e-04
Loss = 2.3843e-03, PNorm = 171.6187, GNorm = 0.0977, lr_0 = 2.1177e-04
Loss = 2.2320e-03, PNorm = 171.6235, GNorm = 0.3172, lr_0 = 2.1162e-04
Loss = 3.5769e-03, PNorm = 171.6304, GNorm = 0.0874, lr_0 = 2.1148e-04
Loss = 2.8065e-03, PNorm = 171.6376, GNorm = 0.2594, lr_0 = 2.1133e-04
Loss = 4.0707e-03, PNorm = 171.6429, GNorm = 0.1958, lr_0 = 2.1119e-04
Loss = 2.3719e-03, PNorm = 171.6494, GNorm = 0.0996, lr_0 = 2.1104e-04
Loss = 2.8431e-03, PNorm = 171.6537, GNorm = 0.1729, lr_0 = 2.1090e-04
Loss = 3.8270e-03, PNorm = 171.6580, GNorm = 0.0874, lr_0 = 2.1076e-04
Loss = 2.7712e-03, PNorm = 171.6634, GNorm = 0.1026, lr_0 = 2.1061e-04
Loss = 5.6239e-03, PNorm = 171.6679, GNorm = 0.0675, lr_0 = 2.1047e-04
Loss = 4.6177e-03, PNorm = 171.6720, GNorm = 0.3422, lr_0 = 2.1032e-04
Loss = 2.8482e-03, PNorm = 171.6791, GNorm = 0.0700, lr_0 = 2.1018e-04
Loss = 2.3114e-03, PNorm = 171.6864, GNorm = 0.0409, lr_0 = 2.1003e-04
Loss = 3.0217e-03, PNorm = 171.6932, GNorm = 0.2476, lr_0 = 2.0989e-04
Loss = 2.6052e-03, PNorm = 171.6973, GNorm = 0.1000, lr_0 = 2.0975e-04
Loss = 2.8225e-03, PNorm = 171.7029, GNorm = 0.1644, lr_0 = 2.0960e-04
Validation mae = 0.277860
Epoch 21
Loss = 3.2037e-03, PNorm = 171.7048, GNorm = 0.3267, lr_0 = 2.0946e-04
Loss = 2.9214e-03, PNorm = 171.7073, GNorm = 0.1958, lr_0 = 2.0932e-04
Loss = 3.0347e-03, PNorm = 171.7104, GNorm = 0.1938, lr_0 = 2.0917e-04
Loss = 1.8745e-03, PNorm = 171.7126, GNorm = 0.0744, lr_0 = 2.0903e-04
Loss = 3.1294e-03, PNorm = 171.7158, GNorm = 0.0713, lr_0 = 2.0889e-04
Loss = 2.8525e-03, PNorm = 171.7212, GNorm = 0.1047, lr_0 = 2.0874e-04
Loss = 1.9893e-03, PNorm = 171.7251, GNorm = 0.1627, lr_0 = 2.0860e-04
Loss = 2.9097e-03, PNorm = 171.7280, GNorm = 0.2497, lr_0 = 2.0846e-04
Loss = 2.3860e-03, PNorm = 171.7311, GNorm = 0.2392, lr_0 = 2.0831e-04
Loss = 2.1225e-03, PNorm = 171.7340, GNorm = 0.0766, lr_0 = 2.0817e-04
Loss = 3.0062e-03, PNorm = 171.7399, GNorm = 0.3030, lr_0 = 2.0803e-04
Loss = 2.4362e-03, PNorm = 171.7453, GNorm = 0.1003, lr_0 = 2.0789e-04
Loss = 2.1793e-03, PNorm = 171.7519, GNorm = 0.1211, lr_0 = 2.0774e-04
Loss = 2.3933e-03, PNorm = 171.7562, GNorm = 0.0595, lr_0 = 2.0760e-04
Loss = 2.5245e-03, PNorm = 171.7598, GNorm = 0.1305, lr_0 = 2.0746e-04
Loss = 2.8200e-03, PNorm = 171.7633, GNorm = 0.2871, lr_0 = 2.0732e-04
Loss = 2.3386e-03, PNorm = 171.7692, GNorm = 0.0822, lr_0 = 2.0718e-04
Loss = 3.0247e-03, PNorm = 171.7752, GNorm = 0.1757, lr_0 = 2.0703e-04
Loss = 3.1169e-03, PNorm = 171.7783, GNorm = 0.1162, lr_0 = 2.0689e-04
Loss = 2.1130e-03, PNorm = 171.7822, GNorm = 0.1552, lr_0 = 2.0675e-04
Loss = 2.6476e-03, PNorm = 171.7859, GNorm = 0.2698, lr_0 = 2.0661e-04
Loss = 2.9227e-03, PNorm = 171.7904, GNorm = 0.0874, lr_0 = 2.0647e-04
Loss = 1.7481e-03, PNorm = 171.7944, GNorm = 0.1179, lr_0 = 2.0633e-04
Loss = 2.4868e-03, PNorm = 171.7998, GNorm = 0.2434, lr_0 = 2.0618e-04
Loss = 2.9107e-03, PNorm = 171.8018, GNorm = 0.1383, lr_0 = 2.0604e-04
Loss = 4.1103e-03, PNorm = 171.8052, GNorm = 0.1029, lr_0 = 2.0590e-04
Loss = 2.1717e-03, PNorm = 171.8064, GNorm = 0.2280, lr_0 = 2.0576e-04
Loss = 2.3500e-03, PNorm = 171.8086, GNorm = 0.2553, lr_0 = 2.0562e-04
Loss = 4.1537e-03, PNorm = 171.8096, GNorm = 0.0613, lr_0 = 2.0548e-04
Loss = 2.0961e-03, PNorm = 171.8127, GNorm = 0.2795, lr_0 = 2.0534e-04
Loss = 2.1666e-03, PNorm = 171.8168, GNorm = 0.2002, lr_0 = 2.0520e-04
Loss = 1.8005e-03, PNorm = 171.8213, GNorm = 0.0588, lr_0 = 2.0506e-04
Loss = 2.4518e-03, PNorm = 171.8245, GNorm = 0.0709, lr_0 = 2.0492e-04
Loss = 1.6544e-03, PNorm = 171.8315, GNorm = 0.1115, lr_0 = 2.0478e-04
Loss = 2.1477e-03, PNorm = 171.8348, GNorm = 0.1683, lr_0 = 2.0464e-04
Loss = 2.5070e-03, PNorm = 171.8397, GNorm = 0.2678, lr_0 = 2.0450e-04
Loss = 2.2428e-03, PNorm = 171.8447, GNorm = 0.1352, lr_0 = 2.0436e-04
Loss = 2.6229e-03, PNorm = 171.8498, GNorm = 0.1065, lr_0 = 2.0422e-04
Loss = 3.0115e-03, PNorm = 171.8564, GNorm = 0.4211, lr_0 = 2.0408e-04
Loss = 2.0801e-03, PNorm = 171.8595, GNorm = 0.1513, lr_0 = 2.0394e-04
Loss = 3.0297e-03, PNorm = 171.8647, GNorm = 0.0537, lr_0 = 2.0380e-04
Loss = 3.1333e-03, PNorm = 171.8700, GNorm = 0.3763, lr_0 = 2.0366e-04
Loss = 1.8743e-03, PNorm = 171.8768, GNorm = 0.2323, lr_0 = 2.0352e-04
Loss = 2.2438e-03, PNorm = 171.8830, GNorm = 0.0374, lr_0 = 2.0338e-04
Loss = 4.1830e-03, PNorm = 171.8844, GNorm = 0.2488, lr_0 = 2.0324e-04
Loss = 2.6049e-03, PNorm = 171.8881, GNorm = 0.1587, lr_0 = 2.0310e-04
Loss = 2.7615e-03, PNorm = 171.8935, GNorm = 0.1930, lr_0 = 2.0296e-04
Loss = 1.7408e-03, PNorm = 171.8981, GNorm = 0.1211, lr_0 = 2.0282e-04
Loss = 2.1333e-03, PNorm = 171.9023, GNorm = 0.1303, lr_0 = 2.0268e-04
Loss = 2.1739e-03, PNorm = 171.9062, GNorm = 0.1496, lr_0 = 2.0254e-04
Loss = 3.1275e-03, PNorm = 171.9121, GNorm = 0.0961, lr_0 = 2.0240e-04
Loss = 3.0329e-03, PNorm = 171.9169, GNorm = 0.1347, lr_0 = 2.0227e-04
Loss = 1.8389e-03, PNorm = 171.9212, GNorm = 0.1630, lr_0 = 2.0213e-04
Loss = 1.9420e-03, PNorm = 171.9252, GNorm = 0.1126, lr_0 = 2.0199e-04
Loss = 2.1982e-03, PNorm = 171.9296, GNorm = 0.1116, lr_0 = 2.0185e-04
Loss = 2.9063e-03, PNorm = 171.9344, GNorm = 0.2158, lr_0 = 2.0171e-04
Loss = 2.5237e-03, PNorm = 171.9380, GNorm = 0.0925, lr_0 = 2.0157e-04
Loss = 3.1588e-03, PNorm = 171.9431, GNorm = 0.1058, lr_0 = 2.0144e-04
Loss = 2.1807e-03, PNorm = 171.9488, GNorm = 0.1008, lr_0 = 2.0130e-04
Loss = 2.5424e-03, PNorm = 171.9540, GNorm = 0.0779, lr_0 = 2.0116e-04
Loss = 1.8483e-03, PNorm = 171.9582, GNorm = 0.1292, lr_0 = 2.0102e-04
Loss = 3.8516e-03, PNorm = 171.9616, GNorm = 0.0969, lr_0 = 2.0088e-04
Loss = 2.7155e-03, PNorm = 171.9636, GNorm = 0.0981, lr_0 = 2.0075e-04
Loss = 2.0828e-03, PNorm = 171.9683, GNorm = 0.2200, lr_0 = 2.0061e-04
Loss = 3.1542e-03, PNorm = 171.9710, GNorm = 0.2462, lr_0 = 2.0047e-04
Loss = 2.4059e-03, PNorm = 171.9760, GNorm = 0.2070, lr_0 = 2.0033e-04
Loss = 2.0586e-03, PNorm = 171.9808, GNorm = 0.1688, lr_0 = 2.0020e-04
Loss = 2.7691e-03, PNorm = 171.9875, GNorm = 0.0982, lr_0 = 2.0006e-04
Loss = 3.5405e-03, PNorm = 171.9924, GNorm = 0.4248, lr_0 = 1.9992e-04
Loss = 2.0440e-03, PNorm = 171.9984, GNorm = 0.0871, lr_0 = 1.9979e-04
Loss = 2.3708e-03, PNorm = 172.0057, GNorm = 0.1057, lr_0 = 1.9965e-04
Loss = 3.0383e-03, PNorm = 172.0103, GNorm = 0.1725, lr_0 = 1.9951e-04
Loss = 2.2028e-03, PNorm = 172.0145, GNorm = 0.1447, lr_0 = 1.9938e-04
Loss = 3.4058e-03, PNorm = 172.0194, GNorm = 0.1119, lr_0 = 1.9924e-04
Loss = 2.6649e-03, PNorm = 172.0248, GNorm = 0.0991, lr_0 = 1.9910e-04
Loss = 2.9424e-03, PNorm = 172.0296, GNorm = 0.1692, lr_0 = 1.9897e-04
Loss = 2.4322e-03, PNorm = 172.0353, GNorm = 0.0716, lr_0 = 1.9883e-04
Loss = 2.9691e-03, PNorm = 172.0404, GNorm = 0.0720, lr_0 = 1.9869e-04
Loss = 1.8004e-03, PNorm = 172.0456, GNorm = 0.1040, lr_0 = 1.9856e-04
Loss = 2.6848e-03, PNorm = 172.0516, GNorm = 0.0681, lr_0 = 1.9842e-04
Loss = 2.6144e-03, PNorm = 172.0555, GNorm = 0.1341, lr_0 = 1.9829e-04
Loss = 4.2215e-03, PNorm = 172.0589, GNorm = 0.0554, lr_0 = 1.9815e-04
Loss = 3.9457e-03, PNorm = 172.0622, GNorm = 0.2129, lr_0 = 1.9801e-04
Loss = 2.3811e-03, PNorm = 172.0657, GNorm = 0.2088, lr_0 = 1.9788e-04
Loss = 4.2847e-03, PNorm = 172.0710, GNorm = 0.0938, lr_0 = 1.9774e-04
Loss = 2.3611e-03, PNorm = 172.0774, GNorm = 0.1106, lr_0 = 1.9761e-04
Loss = 2.8608e-03, PNorm = 172.0862, GNorm = 0.1733, lr_0 = 1.9747e-04
Loss = 1.8276e-03, PNorm = 172.0915, GNorm = 0.0558, lr_0 = 1.9734e-04
Loss = 3.0857e-03, PNorm = 172.0950, GNorm = 0.0514, lr_0 = 1.9720e-04
Loss = 1.7064e-03, PNorm = 172.0974, GNorm = 0.2154, lr_0 = 1.9707e-04
Loss = 2.5915e-03, PNorm = 172.0995, GNorm = 0.2129, lr_0 = 1.9693e-04
Loss = 2.4397e-03, PNorm = 172.1017, GNorm = 0.1314, lr_0 = 1.9680e-04
Loss = 3.0397e-03, PNorm = 172.1064, GNorm = 0.0793, lr_0 = 1.9666e-04
Loss = 2.3424e-03, PNorm = 172.1111, GNorm = 0.1220, lr_0 = 1.9653e-04
Loss = 1.9909e-03, PNorm = 172.1167, GNorm = 0.0578, lr_0 = 1.9639e-04
Loss = 1.7917e-03, PNorm = 172.1220, GNorm = 0.1266, lr_0 = 1.9626e-04
Loss = 2.1351e-03, PNorm = 172.1264, GNorm = 0.0766, lr_0 = 1.9612e-04
Loss = 2.6720e-03, PNorm = 172.1316, GNorm = 0.1628, lr_0 = 1.9599e-04
Loss = 4.7607e-03, PNorm = 172.1362, GNorm = 0.0847, lr_0 = 1.9585e-04
Loss = 2.7038e-03, PNorm = 172.1393, GNorm = 0.0977, lr_0 = 1.9572e-04
Loss = 2.5889e-03, PNorm = 172.1402, GNorm = 0.2576, lr_0 = 1.9559e-04
Loss = 2.8684e-03, PNorm = 172.1450, GNorm = 0.1841, lr_0 = 1.9545e-04
Loss = 2.3468e-03, PNorm = 172.1510, GNorm = 0.1196, lr_0 = 1.9532e-04
Loss = 3.0466e-03, PNorm = 172.1558, GNorm = 0.1280, lr_0 = 1.9518e-04
Loss = 2.8205e-03, PNorm = 172.1599, GNorm = 0.0866, lr_0 = 1.9505e-04
Loss = 3.4002e-03, PNorm = 172.1629, GNorm = 0.1863, lr_0 = 1.9492e-04
Loss = 3.0898e-03, PNorm = 172.1671, GNorm = 0.0668, lr_0 = 1.9478e-04
Loss = 2.9893e-03, PNorm = 172.1724, GNorm = 0.1212, lr_0 = 1.9465e-04
Loss = 5.8454e-03, PNorm = 172.1766, GNorm = 0.1180, lr_0 = 1.9452e-04
Loss = 2.7013e-03, PNorm = 172.1815, GNorm = 0.3531, lr_0 = 1.9438e-04
Loss = 2.2645e-03, PNorm = 172.1860, GNorm = 0.1257, lr_0 = 1.9425e-04
Loss = 3.7153e-03, PNorm = 172.1911, GNorm = 0.0625, lr_0 = 1.9412e-04
Loss = 5.1897e-03, PNorm = 172.1957, GNorm = 0.2347, lr_0 = 1.9398e-04
Loss = 2.1744e-03, PNorm = 172.2002, GNorm = 0.2564, lr_0 = 1.9385e-04
Loss = 2.0204e-03, PNorm = 172.2043, GNorm = 0.1544, lr_0 = 1.9372e-04
Loss = 4.3182e-03, PNorm = 172.2101, GNorm = 0.1139, lr_0 = 1.9359e-04
Loss = 2.2344e-03, PNorm = 172.2159, GNorm = 0.1185, lr_0 = 1.9345e-04
Loss = 4.0177e-03, PNorm = 172.2232, GNorm = 0.0803, lr_0 = 1.9332e-04
Loss = 2.0527e-03, PNorm = 172.2290, GNorm = 0.1645, lr_0 = 1.9319e-04
Loss = 2.0402e-03, PNorm = 172.2352, GNorm = 0.1237, lr_0 = 1.9306e-04
Validation mae = 0.277495
Epoch 22
Loss = 2.6041e-03, PNorm = 172.2393, GNorm = 0.2302, lr_0 = 1.9292e-04
Loss = 2.2415e-03, PNorm = 172.2429, GNorm = 0.1272, lr_0 = 1.9279e-04
Loss = 2.3824e-03, PNorm = 172.2467, GNorm = 0.1166, lr_0 = 1.9266e-04
Loss = 1.8543e-03, PNorm = 172.2521, GNorm = 0.0443, lr_0 = 1.9253e-04
Loss = 1.9114e-03, PNorm = 172.2553, GNorm = 0.1973, lr_0 = 1.9240e-04
Loss = 2.8491e-03, PNorm = 172.2569, GNorm = 0.3355, lr_0 = 1.9226e-04
Loss = 2.9660e-03, PNorm = 172.2592, GNorm = 0.2109, lr_0 = 1.9213e-04
Loss = 2.2678e-03, PNorm = 172.2617, GNorm = 0.2238, lr_0 = 1.9200e-04
Loss = 1.9697e-03, PNorm = 172.2642, GNorm = 0.3324, lr_0 = 1.9187e-04
Loss = 1.7883e-03, PNorm = 172.2661, GNorm = 0.1355, lr_0 = 1.9174e-04
Loss = 2.2702e-03, PNorm = 172.2694, GNorm = 0.4002, lr_0 = 1.9161e-04
Loss = 1.6232e-03, PNorm = 172.2713, GNorm = 0.1609, lr_0 = 1.9148e-04
Loss = 1.8057e-03, PNorm = 172.2731, GNorm = 0.1304, lr_0 = 1.9134e-04
Loss = 2.4595e-03, PNorm = 172.2771, GNorm = 0.1228, lr_0 = 1.9121e-04
Loss = 4.4266e-03, PNorm = 172.2804, GNorm = 0.1740, lr_0 = 1.9108e-04
Loss = 2.0290e-03, PNorm = 172.2823, GNorm = 0.1620, lr_0 = 1.9095e-04
Loss = 1.8089e-03, PNorm = 172.2851, GNorm = 0.0942, lr_0 = 1.9082e-04
Loss = 2.3713e-03, PNorm = 172.2873, GNorm = 0.1556, lr_0 = 1.9069e-04
Loss = 2.1163e-03, PNorm = 172.2910, GNorm = 0.3213, lr_0 = 1.9056e-04
Loss = 1.4418e-03, PNorm = 172.2949, GNorm = 0.1446, lr_0 = 1.9043e-04
Loss = 2.6021e-03, PNorm = 172.2979, GNorm = 0.1258, lr_0 = 1.9030e-04
Loss = 2.6282e-03, PNorm = 172.2988, GNorm = 0.0473, lr_0 = 1.9017e-04
Loss = 3.0438e-03, PNorm = 172.3005, GNorm = 0.1179, lr_0 = 1.9004e-04
Loss = 3.2494e-03, PNorm = 172.3047, GNorm = 0.1269, lr_0 = 1.8991e-04
Loss = 2.7464e-03, PNorm = 172.3112, GNorm = 0.0736, lr_0 = 1.8978e-04
Loss = 3.3555e-03, PNorm = 172.3139, GNorm = 0.0521, lr_0 = 1.8965e-04
Loss = 2.8833e-03, PNorm = 172.3168, GNorm = 0.0796, lr_0 = 1.8952e-04
Loss = 3.9553e-03, PNorm = 172.3203, GNorm = 0.2654, lr_0 = 1.8939e-04
Loss = 2.7600e-03, PNorm = 172.3239, GNorm = 0.0896, lr_0 = 1.8926e-04
Loss = 1.7486e-03, PNorm = 172.3284, GNorm = 0.0756, lr_0 = 1.8913e-04
Loss = 1.9826e-03, PNorm = 172.3335, GNorm = 0.2290, lr_0 = 1.8900e-04
Loss = 2.2222e-03, PNorm = 172.3375, GNorm = 0.2648, lr_0 = 1.8887e-04
Loss = 1.3477e-03, PNorm = 172.3409, GNorm = 0.1183, lr_0 = 1.8874e-04
Loss = 2.8654e-03, PNorm = 172.3431, GNorm = 0.1549, lr_0 = 1.8861e-04
Loss = 1.8419e-03, PNorm = 172.3440, GNorm = 0.6423, lr_0 = 1.8848e-04
Loss = 1.7657e-03, PNorm = 172.3447, GNorm = 0.0693, lr_0 = 1.8835e-04
Loss = 1.7738e-03, PNorm = 172.3484, GNorm = 0.1766, lr_0 = 1.8822e-04
Loss = 1.7300e-03, PNorm = 172.3512, GNorm = 0.0875, lr_0 = 1.8809e-04
Loss = 2.8903e-03, PNorm = 172.3546, GNorm = 0.2212, lr_0 = 1.8797e-04
Loss = 1.8674e-03, PNorm = 172.3599, GNorm = 0.0492, lr_0 = 1.8784e-04
Loss = 2.3354e-03, PNorm = 172.3646, GNorm = 0.0816, lr_0 = 1.8771e-04
Loss = 2.6002e-03, PNorm = 172.3686, GNorm = 0.1958, lr_0 = 1.8758e-04
Loss = 5.0107e-03, PNorm = 172.3726, GNorm = 0.0320, lr_0 = 1.8745e-04
Loss = 1.8573e-03, PNorm = 172.3754, GNorm = 0.0800, lr_0 = 1.8732e-04
Loss = 2.4401e-03, PNorm = 172.3773, GNorm = 0.1036, lr_0 = 1.8719e-04
Loss = 1.7930e-03, PNorm = 172.3807, GNorm = 0.1204, lr_0 = 1.8707e-04
Loss = 2.8962e-03, PNorm = 172.3847, GNorm = 0.1677, lr_0 = 1.8694e-04
Loss = 2.2859e-03, PNorm = 172.3864, GNorm = 0.0675, lr_0 = 1.8681e-04
Loss = 2.6343e-03, PNorm = 172.3905, GNorm = 0.2943, lr_0 = 1.8668e-04
Loss = 4.5257e-03, PNorm = 172.3943, GNorm = 0.1982, lr_0 = 1.8655e-04
Loss = 2.4977e-03, PNorm = 172.3998, GNorm = 0.1332, lr_0 = 1.8643e-04
Loss = 2.4410e-03, PNorm = 172.4041, GNorm = 0.2263, lr_0 = 1.8630e-04
Loss = 2.5404e-03, PNorm = 172.4096, GNorm = 0.0924, lr_0 = 1.8617e-04
Loss = 3.4996e-03, PNorm = 172.4122, GNorm = 0.0819, lr_0 = 1.8604e-04
Loss = 3.5791e-03, PNorm = 172.4154, GNorm = 0.0555, lr_0 = 1.8592e-04
Loss = 2.7725e-03, PNorm = 172.4195, GNorm = 0.1229, lr_0 = 1.8579e-04
Loss = 1.8381e-03, PNorm = 172.4236, GNorm = 0.0623, lr_0 = 1.8566e-04
Loss = 3.5194e-03, PNorm = 172.4239, GNorm = 0.1507, lr_0 = 1.8553e-04
Loss = 3.0962e-03, PNorm = 172.4271, GNorm = 0.0947, lr_0 = 1.8541e-04
Loss = 1.8351e-03, PNorm = 172.4329, GNorm = 0.2382, lr_0 = 1.8528e-04
Loss = 2.1794e-03, PNorm = 172.4369, GNorm = 0.1106, lr_0 = 1.8515e-04
Loss = 4.5534e-03, PNorm = 172.4406, GNorm = 0.2259, lr_0 = 1.8503e-04
Loss = 2.2155e-03, PNorm = 172.4453, GNorm = 0.1097, lr_0 = 1.8490e-04
Loss = 1.7775e-03, PNorm = 172.4503, GNorm = 0.1238, lr_0 = 1.8477e-04
Loss = 2.7768e-03, PNorm = 172.4552, GNorm = 0.2441, lr_0 = 1.8465e-04
Loss = 3.4678e-03, PNorm = 172.4599, GNorm = 0.0818, lr_0 = 1.8452e-04
Loss = 2.4135e-03, PNorm = 172.4641, GNorm = 0.1149, lr_0 = 1.8439e-04
Loss = 2.0182e-03, PNorm = 172.4684, GNorm = 0.1461, lr_0 = 1.8427e-04
Loss = 1.7185e-03, PNorm = 172.4737, GNorm = 0.1084, lr_0 = 1.8414e-04
Loss = 2.5496e-03, PNorm = 172.4777, GNorm = 0.2411, lr_0 = 1.8401e-04
Loss = 1.5003e-03, PNorm = 172.4809, GNorm = 0.1230, lr_0 = 1.8389e-04
Loss = 1.7697e-03, PNorm = 172.4834, GNorm = 0.1909, lr_0 = 1.8376e-04
Loss = 2.5788e-03, PNorm = 172.4882, GNorm = 0.1141, lr_0 = 1.8364e-04
Loss = 2.2071e-03, PNorm = 172.4933, GNorm = 0.1517, lr_0 = 1.8351e-04
Loss = 2.4587e-03, PNorm = 172.4974, GNorm = 0.1599, lr_0 = 1.8338e-04
Loss = 2.8291e-03, PNorm = 172.5020, GNorm = 0.1560, lr_0 = 1.8326e-04
Loss = 2.7553e-03, PNorm = 172.5075, GNorm = 0.1430, lr_0 = 1.8313e-04
Loss = 2.9486e-03, PNorm = 172.5122, GNorm = 0.0872, lr_0 = 1.8301e-04
Loss = 1.8378e-03, PNorm = 172.5176, GNorm = 0.1043, lr_0 = 1.8288e-04
Loss = 3.5667e-03, PNorm = 172.5194, GNorm = 0.1001, lr_0 = 1.8276e-04
Loss = 2.1199e-03, PNorm = 172.5227, GNorm = 0.0901, lr_0 = 1.8263e-04
Loss = 1.4529e-03, PNorm = 172.5256, GNorm = 0.0765, lr_0 = 1.8251e-04
Loss = 1.9379e-03, PNorm = 172.5286, GNorm = 0.1858, lr_0 = 1.8238e-04
Loss = 1.5837e-03, PNorm = 172.5335, GNorm = 0.0573, lr_0 = 1.8226e-04
Loss = 2.6410e-03, PNorm = 172.5378, GNorm = 0.3805, lr_0 = 1.8213e-04
Loss = 3.7189e-03, PNorm = 172.5408, GNorm = 0.0750, lr_0 = 1.8201e-04
Loss = 1.7131e-03, PNorm = 172.5436, GNorm = 0.1921, lr_0 = 1.8188e-04
Loss = 1.7507e-03, PNorm = 172.5462, GNorm = 0.1905, lr_0 = 1.8176e-04
Loss = 1.7128e-03, PNorm = 172.5498, GNorm = 0.1269, lr_0 = 1.8163e-04
Loss = 2.0490e-03, PNorm = 172.5538, GNorm = 0.2437, lr_0 = 1.8151e-04
Loss = 3.1381e-03, PNorm = 172.5572, GNorm = 0.1944, lr_0 = 1.8138e-04
Loss = 3.1289e-03, PNorm = 172.5617, GNorm = 0.0567, lr_0 = 1.8126e-04
Loss = 3.5657e-03, PNorm = 172.5638, GNorm = 0.1373, lr_0 = 1.8114e-04
Loss = 1.5969e-03, PNorm = 172.5668, GNorm = 0.0965, lr_0 = 1.8101e-04
Loss = 1.6545e-03, PNorm = 172.5727, GNorm = 0.0949, lr_0 = 1.8089e-04
Loss = 2.7029e-03, PNorm = 172.5758, GNorm = 0.1779, lr_0 = 1.8076e-04
Loss = 1.3682e-03, PNorm = 172.5803, GNorm = 0.1606, lr_0 = 1.8064e-04
Loss = 1.7811e-03, PNorm = 172.5827, GNorm = 0.1957, lr_0 = 1.8052e-04
Loss = 2.3770e-03, PNorm = 172.5872, GNorm = 0.1041, lr_0 = 1.8039e-04
Loss = 1.6192e-03, PNorm = 172.5889, GNorm = 0.1010, lr_0 = 1.8027e-04
Loss = 2.0471e-03, PNorm = 172.5926, GNorm = 0.0677, lr_0 = 1.8015e-04
Loss = 1.4304e-03, PNorm = 172.5959, GNorm = 0.1283, lr_0 = 1.8002e-04
Loss = 2.4373e-03, PNorm = 172.5998, GNorm = 0.1050, lr_0 = 1.7990e-04
Loss = 3.6645e-03, PNorm = 172.6024, GNorm = 0.2020, lr_0 = 1.7978e-04
Loss = 2.7033e-03, PNorm = 172.6062, GNorm = 0.0834, lr_0 = 1.7965e-04
Loss = 2.2736e-03, PNorm = 172.6076, GNorm = 0.0982, lr_0 = 1.7953e-04
Loss = 1.4881e-03, PNorm = 172.6118, GNorm = 0.0499, lr_0 = 1.7941e-04
Loss = 1.8124e-03, PNorm = 172.6157, GNorm = 0.1467, lr_0 = 1.7928e-04
Loss = 1.7308e-03, PNorm = 172.6192, GNorm = 0.1758, lr_0 = 1.7916e-04
Loss = 1.8894e-03, PNorm = 172.6229, GNorm = 0.0948, lr_0 = 1.7904e-04
Loss = 3.9269e-03, PNorm = 172.6231, GNorm = 0.0701, lr_0 = 1.7892e-04
Loss = 1.7446e-03, PNorm = 172.6250, GNorm = 0.1075, lr_0 = 1.7879e-04
Loss = 2.3805e-03, PNorm = 172.6280, GNorm = 0.0711, lr_0 = 1.7867e-04
Loss = 1.8271e-03, PNorm = 172.6316, GNorm = 0.1512, lr_0 = 1.7855e-04
Loss = 3.0185e-03, PNorm = 172.6347, GNorm = 0.0841, lr_0 = 1.7843e-04
Loss = 1.9196e-03, PNorm = 172.6376, GNorm = 0.1740, lr_0 = 1.7830e-04
Loss = 2.4086e-03, PNorm = 172.6396, GNorm = 0.1276, lr_0 = 1.7818e-04
Loss = 1.5964e-03, PNorm = 172.6434, GNorm = 0.2187, lr_0 = 1.7806e-04
Loss = 1.3705e-03, PNorm = 172.6469, GNorm = 0.0595, lr_0 = 1.7794e-04
Loss = 3.6429e-03, PNorm = 172.6521, GNorm = 0.1458, lr_0 = 1.7782e-04
Validation mae = 0.277899
Epoch 23
Loss = 2.5551e-03, PNorm = 172.6555, GNorm = 0.1423, lr_0 = 1.7769e-04
Loss = 3.1571e-03, PNorm = 172.6578, GNorm = 0.1710, lr_0 = 1.7757e-04
Loss = 1.6914e-03, PNorm = 172.6603, GNorm = 0.0457, lr_0 = 1.7745e-04
Loss = 2.1796e-03, PNorm = 172.6616, GNorm = 0.0995, lr_0 = 1.7733e-04
Loss = 2.0578e-03, PNorm = 172.6633, GNorm = 0.0798, lr_0 = 1.7721e-04
Loss = 1.9082e-03, PNorm = 172.6655, GNorm = 0.1056, lr_0 = 1.7709e-04
Loss = 2.0773e-03, PNorm = 172.6705, GNorm = 0.2664, lr_0 = 1.7696e-04
Loss = 2.5250e-03, PNorm = 172.6740, GNorm = 0.1212, lr_0 = 1.7684e-04
Loss = 1.2406e-03, PNorm = 172.6786, GNorm = 0.1372, lr_0 = 1.7672e-04
Loss = 2.1936e-03, PNorm = 172.6808, GNorm = 0.0895, lr_0 = 1.7660e-04
Loss = 3.2515e-03, PNorm = 172.6828, GNorm = 0.0513, lr_0 = 1.7648e-04
Loss = 1.6339e-03, PNorm = 172.6849, GNorm = 0.1561, lr_0 = 1.7636e-04
Loss = 1.2740e-03, PNorm = 172.6886, GNorm = 0.0789, lr_0 = 1.7624e-04
Loss = 1.9348e-03, PNorm = 172.6933, GNorm = 0.0417, lr_0 = 1.7612e-04
Loss = 1.3366e-03, PNorm = 172.6944, GNorm = 0.0481, lr_0 = 1.7600e-04
Loss = 1.7743e-03, PNorm = 172.6964, GNorm = 0.1068, lr_0 = 1.7588e-04
Loss = 4.2294e-03, PNorm = 172.6989, GNorm = 0.0567, lr_0 = 1.7576e-04
Loss = 1.3659e-03, PNorm = 172.7012, GNorm = 0.1177, lr_0 = 1.7564e-04
Loss = 1.8059e-03, PNorm = 172.7033, GNorm = 0.1721, lr_0 = 1.7552e-04
Loss = 1.2276e-03, PNorm = 172.7051, GNorm = 0.0987, lr_0 = 1.7540e-04
Loss = 2.4036e-03, PNorm = 172.7087, GNorm = 0.0766, lr_0 = 1.7528e-04
Loss = 2.9742e-03, PNorm = 172.7112, GNorm = 0.1706, lr_0 = 1.7516e-04
Loss = 1.7169e-03, PNorm = 172.7149, GNorm = 0.1116, lr_0 = 1.7504e-04
Loss = 1.8796e-03, PNorm = 172.7183, GNorm = 0.1946, lr_0 = 1.7492e-04
Loss = 2.0468e-03, PNorm = 172.7210, GNorm = 0.0905, lr_0 = 1.7480e-04
Loss = 1.4597e-03, PNorm = 172.7236, GNorm = 0.1688, lr_0 = 1.7468e-04
Loss = 1.9803e-03, PNorm = 172.7265, GNorm = 0.1145, lr_0 = 1.7456e-04
Loss = 1.3735e-03, PNorm = 172.7289, GNorm = 0.2070, lr_0 = 1.7444e-04
Loss = 2.2976e-03, PNorm = 172.7303, GNorm = 0.1259, lr_0 = 1.7432e-04
Loss = 1.4972e-03, PNorm = 172.7334, GNorm = 0.1881, lr_0 = 1.7420e-04
Loss = 3.4899e-03, PNorm = 172.7359, GNorm = 0.0965, lr_0 = 1.7408e-04
Loss = 1.4530e-03, PNorm = 172.7386, GNorm = 0.2561, lr_0 = 1.7396e-04
Loss = 1.8325e-03, PNorm = 172.7413, GNorm = 0.3121, lr_0 = 1.7384e-04
Loss = 2.5004e-03, PNorm = 172.7454, GNorm = 0.1818, lr_0 = 1.7372e-04
Loss = 1.9080e-03, PNorm = 172.7487, GNorm = 0.1295, lr_0 = 1.7360e-04
Loss = 4.6029e-03, PNorm = 172.7506, GNorm = 0.1259, lr_0 = 1.7348e-04
Loss = 2.4706e-03, PNorm = 172.7526, GNorm = 0.0424, lr_0 = 1.7336e-04
Loss = 1.3778e-03, PNorm = 172.7566, GNorm = 0.1088, lr_0 = 1.7325e-04
Loss = 2.6741e-03, PNorm = 172.7582, GNorm = 0.1756, lr_0 = 1.7313e-04
Loss = 2.7898e-03, PNorm = 172.7608, GNorm = 0.2269, lr_0 = 1.7301e-04
Loss = 1.6412e-03, PNorm = 172.7637, GNorm = 0.0490, lr_0 = 1.7289e-04
Loss = 3.2988e-03, PNorm = 172.7681, GNorm = 0.4102, lr_0 = 1.7277e-04
Loss = 1.4931e-03, PNorm = 172.7728, GNorm = 0.1819, lr_0 = 1.7265e-04
Loss = 3.5515e-03, PNorm = 172.7768, GNorm = 0.1709, lr_0 = 1.7253e-04
Loss = 2.4652e-03, PNorm = 172.7816, GNorm = 0.2064, lr_0 = 1.7242e-04
Loss = 2.5235e-03, PNorm = 172.7819, GNorm = 0.1759, lr_0 = 1.7230e-04
Loss = 2.1903e-03, PNorm = 172.7818, GNorm = 0.0945, lr_0 = 1.7218e-04
Loss = 1.7415e-03, PNorm = 172.7849, GNorm = 0.1386, lr_0 = 1.7206e-04
Loss = 2.3347e-03, PNorm = 172.7879, GNorm = 0.1349, lr_0 = 1.7194e-04
Loss = 1.8024e-03, PNorm = 172.7905, GNorm = 0.1858, lr_0 = 1.7183e-04
Loss = 2.0557e-03, PNorm = 172.7953, GNorm = 0.1741, lr_0 = 1.7171e-04
Loss = 1.3433e-03, PNorm = 172.8004, GNorm = 0.2138, lr_0 = 1.7159e-04
Loss = 2.2805e-03, PNorm = 172.8035, GNorm = 0.2546, lr_0 = 1.7147e-04
Loss = 1.7143e-03, PNorm = 172.8064, GNorm = 0.1856, lr_0 = 1.7136e-04
Loss = 1.8755e-03, PNorm = 172.8104, GNorm = 0.0681, lr_0 = 1.7124e-04
Loss = 2.7140e-03, PNorm = 172.8122, GNorm = 0.0972, lr_0 = 1.7112e-04
Loss = 1.5443e-03, PNorm = 172.8155, GNorm = 0.1226, lr_0 = 1.7100e-04
Loss = 3.2962e-03, PNorm = 172.8205, GNorm = 0.1263, lr_0 = 1.7089e-04
Loss = 2.3920e-03, PNorm = 172.8244, GNorm = 0.0768, lr_0 = 1.7077e-04
Loss = 1.7464e-03, PNorm = 172.8272, GNorm = 0.0847, lr_0 = 1.7065e-04
Loss = 1.2537e-03, PNorm = 172.8292, GNorm = 0.1550, lr_0 = 1.7054e-04
Loss = 1.8266e-03, PNorm = 172.8326, GNorm = 0.1523, lr_0 = 1.7042e-04
Loss = 1.4700e-03, PNorm = 172.8340, GNorm = 0.0657, lr_0 = 1.7030e-04
Loss = 1.5109e-03, PNorm = 172.8362, GNorm = 0.0854, lr_0 = 1.7019e-04
Loss = 1.9409e-03, PNorm = 172.8385, GNorm = 0.0628, lr_0 = 1.7007e-04
Loss = 2.1338e-03, PNorm = 172.8417, GNorm = 0.0935, lr_0 = 1.6995e-04
Loss = 1.5352e-03, PNorm = 172.8448, GNorm = 0.1516, lr_0 = 1.6984e-04
Loss = 1.3962e-03, PNorm = 172.8483, GNorm = 0.0776, lr_0 = 1.6972e-04
Loss = 2.3779e-03, PNorm = 172.8525, GNorm = 0.0968, lr_0 = 1.6960e-04
Loss = 4.0284e-03, PNorm = 172.8575, GNorm = 0.9408, lr_0 = 1.6949e-04
Loss = 1.7153e-03, PNorm = 172.8605, GNorm = 0.3034, lr_0 = 1.6937e-04
Loss = 1.4188e-03, PNorm = 172.8639, GNorm = 0.1466, lr_0 = 1.6926e-04
Loss = 2.5867e-03, PNorm = 172.8655, GNorm = 0.2318, lr_0 = 1.6914e-04
Loss = 1.4497e-03, PNorm = 172.8714, GNorm = 0.0676, lr_0 = 1.6902e-04
Loss = 1.7442e-03, PNorm = 172.8757, GNorm = 0.0416, lr_0 = 1.6891e-04
Loss = 1.8464e-03, PNorm = 172.8808, GNorm = 0.1788, lr_0 = 1.6879e-04
Loss = 1.3129e-03, PNorm = 172.8861, GNorm = 0.2022, lr_0 = 1.6868e-04
Loss = 2.1023e-03, PNorm = 172.8904, GNorm = 0.0838, lr_0 = 1.6856e-04
Loss = 2.4294e-03, PNorm = 172.8944, GNorm = 0.0463, lr_0 = 1.6845e-04
Loss = 1.4611e-03, PNorm = 172.8985, GNorm = 0.1015, lr_0 = 1.6833e-04
Loss = 1.5545e-03, PNorm = 172.9018, GNorm = 0.0633, lr_0 = 1.6821e-04
Loss = 2.7348e-03, PNorm = 172.9038, GNorm = 0.0886, lr_0 = 1.6810e-04
Loss = 3.1360e-03, PNorm = 172.9063, GNorm = 0.0997, lr_0 = 1.6798e-04
Loss = 1.9788e-03, PNorm = 172.9079, GNorm = 0.0777, lr_0 = 1.6787e-04
Loss = 1.9092e-03, PNorm = 172.9124, GNorm = 0.0352, lr_0 = 1.6775e-04
Loss = 3.6080e-03, PNorm = 172.9168, GNorm = 0.4489, lr_0 = 1.6764e-04
Loss = 2.8049e-03, PNorm = 172.9201, GNorm = 0.1145, lr_0 = 1.6752e-04
Loss = 1.5841e-03, PNorm = 172.9233, GNorm = 0.2141, lr_0 = 1.6741e-04
Loss = 1.8140e-03, PNorm = 172.9266, GNorm = 0.0951, lr_0 = 1.6729e-04
Loss = 2.2960e-03, PNorm = 172.9304, GNorm = 0.0918, lr_0 = 1.6718e-04
Loss = 2.3440e-03, PNorm = 172.9341, GNorm = 0.1649, lr_0 = 1.6707e-04
Loss = 1.8904e-03, PNorm = 172.9365, GNorm = 0.2373, lr_0 = 1.6695e-04
Loss = 1.5912e-03, PNorm = 172.9400, GNorm = 0.2265, lr_0 = 1.6684e-04
Loss = 3.4528e-03, PNorm = 172.9426, GNorm = 0.0924, lr_0 = 1.6672e-04
Loss = 1.8601e-03, PNorm = 172.9444, GNorm = 0.1088, lr_0 = 1.6661e-04
Loss = 1.4072e-03, PNorm = 172.9469, GNorm = 0.0697, lr_0 = 1.6649e-04
Loss = 2.5294e-03, PNorm = 172.9492, GNorm = 0.3028, lr_0 = 1.6638e-04
Loss = 1.6392e-03, PNorm = 172.9537, GNorm = 0.2006, lr_0 = 1.6627e-04
Loss = 4.7064e-03, PNorm = 172.9587, GNorm = 0.1489, lr_0 = 1.6615e-04
Loss = 2.7283e-03, PNorm = 172.9629, GNorm = 0.0608, lr_0 = 1.6604e-04
Loss = 5.1389e-03, PNorm = 172.9645, GNorm = 0.1454, lr_0 = 1.6592e-04
Loss = 1.9847e-03, PNorm = 172.9692, GNorm = 0.2717, lr_0 = 1.6581e-04
Loss = 2.3671e-03, PNorm = 172.9731, GNorm = 0.1804, lr_0 = 1.6570e-04
Loss = 1.6572e-03, PNorm = 172.9778, GNorm = 0.1775, lr_0 = 1.6558e-04
Loss = 2.6169e-03, PNorm = 172.9830, GNorm = 0.1700, lr_0 = 1.6547e-04
Loss = 1.5592e-03, PNorm = 172.9863, GNorm = 0.0943, lr_0 = 1.6536e-04
Loss = 1.3044e-03, PNorm = 172.9905, GNorm = 0.0453, lr_0 = 1.6524e-04
Loss = 2.2273e-03, PNorm = 172.9908, GNorm = 0.1092, lr_0 = 1.6513e-04
Loss = 3.3372e-03, PNorm = 172.9937, GNorm = 0.3172, lr_0 = 1.6502e-04
Loss = 1.4819e-03, PNorm = 172.9986, GNorm = 0.1880, lr_0 = 1.6490e-04
Loss = 2.0064e-03, PNorm = 173.0040, GNorm = 0.1022, lr_0 = 1.6479e-04
Loss = 1.6821e-03, PNorm = 173.0081, GNorm = 0.1416, lr_0 = 1.6468e-04
Loss = 1.3968e-03, PNorm = 173.0109, GNorm = 0.1865, lr_0 = 1.6457e-04
Loss = 1.3757e-03, PNorm = 173.0150, GNorm = 0.1831, lr_0 = 1.6445e-04
Loss = 1.5287e-03, PNorm = 173.0192, GNorm = 0.1412, lr_0 = 1.6434e-04
Loss = 4.1430e-03, PNorm = 173.0227, GNorm = 0.0509, lr_0 = 1.6423e-04
Loss = 1.5115e-03, PNorm = 173.0273, GNorm = 0.0867, lr_0 = 1.6412e-04
Loss = 2.7750e-03, PNorm = 173.0312, GNorm = 0.1083, lr_0 = 1.6400e-04
Loss = 2.2475e-03, PNorm = 173.0341, GNorm = 0.1960, lr_0 = 1.6389e-04
Loss = 1.4073e-03, PNorm = 173.0382, GNorm = 0.0959, lr_0 = 1.6378e-04
Validation mae = 0.277817
Epoch 24
Loss = 1.2063e-03, PNorm = 173.0418, GNorm = 0.0311, lr_0 = 1.6367e-04
Loss = 1.9142e-03, PNorm = 173.0437, GNorm = 0.1009, lr_0 = 1.6355e-04
Loss = 2.9331e-03, PNorm = 173.0449, GNorm = 0.1262, lr_0 = 1.6344e-04
Loss = 1.5053e-03, PNorm = 173.0468, GNorm = 0.0734, lr_0 = 1.6333e-04
Loss = 1.7451e-03, PNorm = 173.0477, GNorm = 0.0771, lr_0 = 1.6322e-04
Loss = 2.1923e-03, PNorm = 173.0502, GNorm = 0.1454, lr_0 = 1.6311e-04
Loss = 2.0020e-03, PNorm = 173.0515, GNorm = 0.0631, lr_0 = 1.6299e-04
Loss = 1.2984e-03, PNorm = 173.0538, GNorm = 0.0904, lr_0 = 1.6288e-04
Loss = 2.7482e-03, PNorm = 173.0552, GNorm = 0.1220, lr_0 = 1.6277e-04
Loss = 1.5189e-03, PNorm = 173.0550, GNorm = 0.1869, lr_0 = 1.6266e-04
Loss = 2.5272e-03, PNorm = 173.0567, GNorm = 0.0454, lr_0 = 1.6255e-04
Loss = 1.7054e-03, PNorm = 173.0587, GNorm = 0.1184, lr_0 = 1.6244e-04
Loss = 1.5998e-03, PNorm = 173.0609, GNorm = 0.1136, lr_0 = 1.6233e-04
Loss = 2.3195e-03, PNorm = 173.0618, GNorm = 0.2916, lr_0 = 1.6221e-04
Loss = 1.4941e-03, PNorm = 173.0634, GNorm = 0.1211, lr_0 = 1.6210e-04
Loss = 2.3389e-03, PNorm = 173.0655, GNorm = 0.0477, lr_0 = 1.6199e-04
Loss = 1.2551e-03, PNorm = 173.0674, GNorm = 0.1316, lr_0 = 1.6188e-04
Loss = 1.4352e-03, PNorm = 173.0700, GNorm = 0.1619, lr_0 = 1.6177e-04
Loss = 3.7582e-03, PNorm = 173.0720, GNorm = 0.2021, lr_0 = 1.6166e-04
Loss = 1.1413e-03, PNorm = 173.0735, GNorm = 0.2726, lr_0 = 1.6155e-04
Loss = 1.8159e-03, PNorm = 173.0748, GNorm = 0.1863, lr_0 = 1.6144e-04
Loss = 1.1341e-03, PNorm = 173.0762, GNorm = 0.1524, lr_0 = 1.6133e-04
Loss = 1.4050e-03, PNorm = 173.0786, GNorm = 0.2627, lr_0 = 1.6122e-04
Loss = 1.8793e-03, PNorm = 173.0809, GNorm = 0.0727, lr_0 = 1.6111e-04
Loss = 3.0260e-03, PNorm = 173.0834, GNorm = 0.1680, lr_0 = 1.6100e-04
Loss = 1.2143e-03, PNorm = 173.0865, GNorm = 0.0720, lr_0 = 1.6089e-04
Loss = 1.1689e-03, PNorm = 173.0917, GNorm = 0.0971, lr_0 = 1.6078e-04
Loss = 3.2022e-03, PNorm = 173.0967, GNorm = 0.1585, lr_0 = 1.6067e-04
Loss = 1.4077e-03, PNorm = 173.0998, GNorm = 0.0345, lr_0 = 1.6056e-04
Loss = 1.1173e-03, PNorm = 173.1019, GNorm = 0.0443, lr_0 = 1.6045e-04
Loss = 1.5024e-03, PNorm = 173.1044, GNorm = 0.0621, lr_0 = 1.6034e-04
Loss = 1.1591e-03, PNorm = 173.1072, GNorm = 0.0583, lr_0 = 1.6023e-04
Loss = 2.0685e-03, PNorm = 173.1099, GNorm = 0.0911, lr_0 = 1.6012e-04
Loss = 1.5603e-03, PNorm = 173.1118, GNorm = 0.1024, lr_0 = 1.6001e-04
Loss = 1.3413e-03, PNorm = 173.1151, GNorm = 0.1822, lr_0 = 1.5990e-04
Loss = 1.8990e-03, PNorm = 173.1158, GNorm = 0.0901, lr_0 = 1.5979e-04
Loss = 1.2758e-03, PNorm = 173.1193, GNorm = 0.0865, lr_0 = 1.5968e-04
Loss = 1.3671e-03, PNorm = 173.1232, GNorm = 0.1289, lr_0 = 1.5957e-04
Loss = 2.5199e-03, PNorm = 173.1259, GNorm = 0.0816, lr_0 = 1.5946e-04
Loss = 1.2308e-03, PNorm = 173.1287, GNorm = 0.0437, lr_0 = 1.5935e-04
Loss = 2.3064e-03, PNorm = 173.1315, GNorm = 0.0873, lr_0 = 1.5924e-04
Loss = 2.0895e-03, PNorm = 173.1340, GNorm = 0.0893, lr_0 = 1.5913e-04
Loss = 1.8986e-03, PNorm = 173.1371, GNorm = 0.1319, lr_0 = 1.5902e-04
Loss = 2.8791e-03, PNorm = 173.1408, GNorm = 0.1618, lr_0 = 1.5891e-04
Loss = 1.6943e-03, PNorm = 173.1448, GNorm = 0.0944, lr_0 = 1.5880e-04
Loss = 1.2425e-03, PNorm = 173.1484, GNorm = 0.0638, lr_0 = 1.5870e-04
Loss = 1.4694e-03, PNorm = 173.1508, GNorm = 0.2283, lr_0 = 1.5859e-04
Loss = 4.3559e-03, PNorm = 173.1510, GNorm = 0.0511, lr_0 = 1.5848e-04
Loss = 2.3659e-03, PNorm = 173.1520, GNorm = 0.0449, lr_0 = 1.5837e-04
Loss = 1.0223e-03, PNorm = 173.1536, GNorm = 0.1501, lr_0 = 1.5826e-04
Loss = 1.1907e-03, PNorm = 173.1567, GNorm = 0.0378, lr_0 = 1.5815e-04
Loss = 1.3869e-03, PNorm = 173.1586, GNorm = 0.0491, lr_0 = 1.5804e-04
Loss = 1.4348e-03, PNorm = 173.1601, GNorm = 0.1376, lr_0 = 1.5794e-04
Loss = 1.1136e-03, PNorm = 173.1626, GNorm = 0.0511, lr_0 = 1.5783e-04
Loss = 1.9575e-03, PNorm = 173.1632, GNorm = 0.0396, lr_0 = 1.5772e-04
Loss = 2.8444e-03, PNorm = 173.1652, GNorm = 0.1455, lr_0 = 1.5761e-04
Loss = 1.5868e-03, PNorm = 173.1686, GNorm = 0.1006, lr_0 = 1.5750e-04
Loss = 1.8112e-03, PNorm = 173.1713, GNorm = 0.1136, lr_0 = 1.5740e-04
Loss = 2.4770e-03, PNorm = 173.1729, GNorm = 0.0838, lr_0 = 1.5729e-04
Loss = 2.1564e-03, PNorm = 173.1752, GNorm = 0.1320, lr_0 = 1.5718e-04
Loss = 1.4568e-03, PNorm = 173.1785, GNorm = 0.0879, lr_0 = 1.5707e-04
Loss = 3.3167e-03, PNorm = 173.1838, GNorm = 0.0637, lr_0 = 1.5697e-04
Loss = 1.1582e-03, PNorm = 173.1876, GNorm = 0.0960, lr_0 = 1.5686e-04
Loss = 1.9969e-03, PNorm = 173.1915, GNorm = 0.1746, lr_0 = 1.5675e-04
Loss = 2.0883e-03, PNorm = 173.1961, GNorm = 0.1108, lr_0 = 1.5664e-04
Loss = 2.5726e-03, PNorm = 173.2012, GNorm = 0.1378, lr_0 = 1.5654e-04
Loss = 1.7944e-03, PNorm = 173.2046, GNorm = 0.0954, lr_0 = 1.5643e-04
Loss = 1.5497e-03, PNorm = 173.2073, GNorm = 0.2860, lr_0 = 1.5632e-04
Loss = 1.6887e-03, PNorm = 173.2098, GNorm = 0.0733, lr_0 = 1.5621e-04
Loss = 2.1056e-03, PNorm = 173.2120, GNorm = 0.1878, lr_0 = 1.5611e-04
Loss = 4.3507e-03, PNorm = 173.2153, GNorm = 0.3750, lr_0 = 1.5600e-04
Loss = 1.9393e-03, PNorm = 173.2150, GNorm = 0.0998, lr_0 = 1.5589e-04
Loss = 1.6765e-03, PNorm = 173.2160, GNorm = 0.0927, lr_0 = 1.5579e-04
Loss = 1.4014e-03, PNorm = 173.2187, GNorm = 0.0891, lr_0 = 1.5568e-04
Loss = 1.4184e-03, PNorm = 173.2213, GNorm = 0.0494, lr_0 = 1.5557e-04
Loss = 1.2939e-03, PNorm = 173.2236, GNorm = 0.0733, lr_0 = 1.5547e-04
Loss = 2.2730e-03, PNorm = 173.2270, GNorm = 0.0856, lr_0 = 1.5536e-04
Loss = 1.8935e-03, PNorm = 173.2279, GNorm = 0.0630, lr_0 = 1.5525e-04
Loss = 4.1356e-03, PNorm = 173.2291, GNorm = 0.1091, lr_0 = 1.5515e-04
Loss = 1.0189e-03, PNorm = 173.2308, GNorm = 0.1703, lr_0 = 1.5504e-04
Loss = 1.5638e-03, PNorm = 173.2346, GNorm = 0.1268, lr_0 = 1.5493e-04
Loss = 1.1870e-03, PNorm = 173.2389, GNorm = 0.0837, lr_0 = 1.5483e-04
Loss = 1.0864e-03, PNorm = 173.2427, GNorm = 0.0722, lr_0 = 1.5472e-04
Loss = 1.7444e-03, PNorm = 173.2453, GNorm = 0.0534, lr_0 = 1.5462e-04
Loss = 3.0777e-03, PNorm = 173.2479, GNorm = 0.1771, lr_0 = 1.5451e-04
Loss = 1.6405e-03, PNorm = 173.2503, GNorm = 0.0530, lr_0 = 1.5440e-04
Loss = 1.6350e-03, PNorm = 173.2537, GNorm = 0.0663, lr_0 = 1.5430e-04
Loss = 1.1952e-03, PNorm = 173.2579, GNorm = 0.0795, lr_0 = 1.5419e-04
Loss = 2.2941e-03, PNorm = 173.2611, GNorm = 0.0525, lr_0 = 1.5409e-04
Loss = 1.8573e-03, PNorm = 173.2631, GNorm = 0.0567, lr_0 = 1.5398e-04
Loss = 1.3122e-03, PNorm = 173.2642, GNorm = 0.0567, lr_0 = 1.5388e-04
Loss = 1.4254e-03, PNorm = 173.2655, GNorm = 0.0607, lr_0 = 1.5377e-04
Loss = 2.8127e-03, PNorm = 173.2692, GNorm = 0.0776, lr_0 = 1.5367e-04
Loss = 2.3605e-03, PNorm = 173.2719, GNorm = 0.1307, lr_0 = 1.5356e-04
Loss = 1.6311e-03, PNorm = 173.2751, GNorm = 0.0281, lr_0 = 1.5346e-04
Loss = 1.5185e-03, PNorm = 173.2782, GNorm = 0.1143, lr_0 = 1.5335e-04
Loss = 1.7668e-03, PNorm = 173.2799, GNorm = 0.0759, lr_0 = 1.5325e-04
Loss = 1.2794e-03, PNorm = 173.2811, GNorm = 0.1202, lr_0 = 1.5314e-04
Loss = 1.6495e-03, PNorm = 173.2839, GNorm = 0.1452, lr_0 = 1.5304e-04
Loss = 5.5791e-03, PNorm = 173.2869, GNorm = 0.1350, lr_0 = 1.5293e-04
Loss = 2.4952e-03, PNorm = 173.2892, GNorm = 0.0619, lr_0 = 1.5283e-04
Loss = 2.3459e-03, PNorm = 173.2918, GNorm = 0.1000, lr_0 = 1.5272e-04
Loss = 1.2912e-03, PNorm = 173.2950, GNorm = 0.1714, lr_0 = 1.5262e-04
Loss = 1.1317e-03, PNorm = 173.2983, GNorm = 0.1335, lr_0 = 1.5251e-04
Loss = 1.9189e-03, PNorm = 173.3009, GNorm = 0.0466, lr_0 = 1.5241e-04
Loss = 1.8004e-03, PNorm = 173.3016, GNorm = 0.1102, lr_0 = 1.5230e-04
Loss = 1.6565e-03, PNorm = 173.3047, GNorm = 0.0847, lr_0 = 1.5220e-04
Loss = 1.5935e-03, PNorm = 173.3077, GNorm = 0.0890, lr_0 = 1.5209e-04
Loss = 1.0726e-03, PNorm = 173.3112, GNorm = 0.1521, lr_0 = 1.5199e-04
Loss = 2.8319e-03, PNorm = 173.3125, GNorm = 0.4679, lr_0 = 1.5189e-04
Loss = 1.7251e-03, PNorm = 173.3136, GNorm = 0.1352, lr_0 = 1.5178e-04
Loss = 1.9327e-03, PNorm = 173.3150, GNorm = 0.0408, lr_0 = 1.5168e-04
Loss = 2.4834e-03, PNorm = 173.3174, GNorm = 0.2072, lr_0 = 1.5157e-04
Loss = 1.9176e-03, PNorm = 173.3217, GNorm = 0.0609, lr_0 = 1.5147e-04
Loss = 1.3329e-03, PNorm = 173.3272, GNorm = 0.0426, lr_0 = 1.5137e-04
Loss = 1.1726e-03, PNorm = 173.3316, GNorm = 0.1722, lr_0 = 1.5126e-04
Loss = 1.4277e-03, PNorm = 173.3342, GNorm = 0.1550, lr_0 = 1.5116e-04
Loss = 2.2532e-03, PNorm = 173.3363, GNorm = 0.1434, lr_0 = 1.5106e-04
Loss = 2.6181e-03, PNorm = 173.3391, GNorm = 0.0810, lr_0 = 1.5095e-04
Loss = 2.7987e-03, PNorm = 173.3409, GNorm = 0.4094, lr_0 = 1.5085e-04
Validation mae = 0.277993
Epoch 25
Loss = 1.8967e-03, PNorm = 173.3427, GNorm = 0.0845, lr_0 = 1.5075e-04
Loss = 1.0560e-03, PNorm = 173.3436, GNorm = 0.1444, lr_0 = 1.5064e-04
Loss = 1.8288e-03, PNorm = 173.3433, GNorm = 0.1670, lr_0 = 1.5054e-04
Loss = 8.9059e-04, PNorm = 173.3439, GNorm = 0.0625, lr_0 = 1.5044e-04
Loss = 1.9730e-03, PNorm = 173.3461, GNorm = 0.0575, lr_0 = 1.5033e-04
Loss = 1.4947e-03, PNorm = 173.3486, GNorm = 0.0875, lr_0 = 1.5023e-04
Loss = 1.7695e-03, PNorm = 173.3504, GNorm = 0.2339, lr_0 = 1.5013e-04
Loss = 1.9989e-03, PNorm = 173.3527, GNorm = 0.0480, lr_0 = 1.5002e-04
Loss = 1.1002e-03, PNorm = 173.3540, GNorm = 0.1399, lr_0 = 1.4992e-04
Loss = 1.6930e-03, PNorm = 173.3565, GNorm = 0.0928, lr_0 = 1.4982e-04
Loss = 2.5211e-03, PNorm = 173.3598, GNorm = 0.1712, lr_0 = 1.4972e-04
Loss = 5.1790e-03, PNorm = 173.3633, GNorm = 0.3971, lr_0 = 1.4961e-04
Loss = 9.8774e-04, PNorm = 173.3662, GNorm = 0.0889, lr_0 = 1.4951e-04
Loss = 9.2440e-04, PNorm = 173.3669, GNorm = 0.0639, lr_0 = 1.4941e-04
Loss = 1.1889e-03, PNorm = 173.3699, GNorm = 0.0695, lr_0 = 1.4931e-04
Loss = 1.4990e-03, PNorm = 173.3716, GNorm = 0.1549, lr_0 = 1.4920e-04
Loss = 1.7956e-03, PNorm = 173.3745, GNorm = 0.1370, lr_0 = 1.4910e-04
Loss = 9.0223e-04, PNorm = 173.3766, GNorm = 0.1458, lr_0 = 1.4900e-04
Loss = 1.6473e-03, PNorm = 173.3774, GNorm = 0.1119, lr_0 = 1.4890e-04
Loss = 2.1322e-03, PNorm = 173.3793, GNorm = 0.0865, lr_0 = 1.4880e-04
Loss = 2.7428e-03, PNorm = 173.3812, GNorm = 0.2910, lr_0 = 1.4869e-04
Loss = 2.4706e-03, PNorm = 173.3836, GNorm = 0.1028, lr_0 = 1.4859e-04
Loss = 1.3614e-03, PNorm = 173.3848, GNorm = 0.0810, lr_0 = 1.4849e-04
Loss = 1.6555e-03, PNorm = 173.3870, GNorm = 0.2826, lr_0 = 1.4839e-04
Loss = 9.9451e-04, PNorm = 173.3895, GNorm = 0.0545, lr_0 = 1.4829e-04
Loss = 1.7534e-03, PNorm = 173.3909, GNorm = 0.0819, lr_0 = 1.4818e-04
Loss = 2.0618e-03, PNorm = 173.3925, GNorm = 0.1006, lr_0 = 1.4808e-04
Loss = 1.1197e-03, PNorm = 173.3953, GNorm = 0.2056, lr_0 = 1.4798e-04
Loss = 1.4991e-03, PNorm = 173.3981, GNorm = 0.1179, lr_0 = 1.4788e-04
Loss = 1.5177e-03, PNorm = 173.4014, GNorm = 0.1042, lr_0 = 1.4778e-04
Loss = 1.2568e-03, PNorm = 173.4031, GNorm = 0.1240, lr_0 = 1.4768e-04
Loss = 1.3151e-03, PNorm = 173.4054, GNorm = 0.1822, lr_0 = 1.4758e-04
Loss = 9.9564e-04, PNorm = 173.4061, GNorm = 0.0422, lr_0 = 1.4748e-04
Loss = 2.0207e-03, PNorm = 173.4090, GNorm = 0.1550, lr_0 = 1.4737e-04
Loss = 1.0316e-03, PNorm = 173.4115, GNorm = 0.1081, lr_0 = 1.4727e-04
Loss = 1.2852e-03, PNorm = 173.4137, GNorm = 0.1856, lr_0 = 1.4717e-04
Loss = 1.0055e-03, PNorm = 173.4157, GNorm = 0.0875, lr_0 = 1.4707e-04
Loss = 3.1317e-03, PNorm = 173.4180, GNorm = 0.1532, lr_0 = 1.4697e-04
Loss = 2.8443e-03, PNorm = 173.4219, GNorm = 0.1613, lr_0 = 1.4687e-04
Loss = 2.0785e-03, PNorm = 173.4241, GNorm = 0.1522, lr_0 = 1.4677e-04
Loss = 1.4887e-03, PNorm = 173.4260, GNorm = 0.0861, lr_0 = 1.4667e-04
Loss = 9.3629e-04, PNorm = 173.4267, GNorm = 0.0572, lr_0 = 1.4657e-04
Loss = 1.0354e-03, PNorm = 173.4287, GNorm = 0.0733, lr_0 = 1.4647e-04
Loss = 1.1549e-03, PNorm = 173.4296, GNorm = 0.0851, lr_0 = 1.4637e-04
Loss = 4.6242e-03, PNorm = 173.4317, GNorm = 0.2633, lr_0 = 1.4627e-04
Loss = 1.7961e-03, PNorm = 173.4345, GNorm = 0.0745, lr_0 = 1.4617e-04
Loss = 1.9211e-03, PNorm = 173.4368, GNorm = 0.2204, lr_0 = 1.4607e-04
Loss = 2.1765e-03, PNorm = 173.4393, GNorm = 0.1410, lr_0 = 1.4597e-04
Loss = 7.8511e-04, PNorm = 173.4416, GNorm = 0.0410, lr_0 = 1.4587e-04
Loss = 2.0354e-03, PNorm = 173.4433, GNorm = 0.2000, lr_0 = 1.4577e-04
Loss = 2.8479e-03, PNorm = 173.4441, GNorm = 0.3387, lr_0 = 1.4567e-04
Loss = 2.3687e-03, PNorm = 173.4451, GNorm = 0.0486, lr_0 = 1.4557e-04
Loss = 8.8571e-04, PNorm = 173.4463, GNorm = 0.0398, lr_0 = 1.4547e-04
Loss = 1.4062e-03, PNorm = 173.4488, GNorm = 0.3221, lr_0 = 1.4537e-04
Loss = 3.2134e-03, PNorm = 173.4526, GNorm = 0.2371, lr_0 = 1.4527e-04
Loss = 1.0387e-03, PNorm = 173.4540, GNorm = 0.0710, lr_0 = 1.4517e-04
Loss = 2.1515e-03, PNorm = 173.4574, GNorm = 0.2921, lr_0 = 1.4507e-04
Loss = 2.5943e-03, PNorm = 173.4602, GNorm = 0.0932, lr_0 = 1.4497e-04
Loss = 1.8350e-03, PNorm = 173.4624, GNorm = 0.1048, lr_0 = 1.4487e-04
Loss = 1.2479e-03, PNorm = 173.4656, GNorm = 0.0710, lr_0 = 1.4477e-04
Loss = 1.1897e-03, PNorm = 173.4697, GNorm = 0.0333, lr_0 = 1.4467e-04
Loss = 1.3184e-03, PNorm = 173.4726, GNorm = 0.1524, lr_0 = 1.4457e-04
Loss = 1.5502e-03, PNorm = 173.4756, GNorm = 0.1015, lr_0 = 1.4447e-04
Loss = 1.8981e-03, PNorm = 173.4777, GNorm = 0.1972, lr_0 = 1.4438e-04
Loss = 1.5071e-03, PNorm = 173.4813, GNorm = 0.2487, lr_0 = 1.4428e-04
Loss = 1.1124e-03, PNorm = 173.4830, GNorm = 0.1091, lr_0 = 1.4418e-04
Loss = 3.0491e-03, PNorm = 173.4843, GNorm = 0.0609, lr_0 = 1.4408e-04
Loss = 1.9672e-03, PNorm = 173.4863, GNorm = 0.2203, lr_0 = 1.4398e-04
Loss = 1.7955e-03, PNorm = 173.4876, GNorm = 0.1909, lr_0 = 1.4388e-04
Loss = 8.0577e-04, PNorm = 173.4909, GNorm = 0.1292, lr_0 = 1.4378e-04
Loss = 8.6549e-04, PNorm = 173.4924, GNorm = 0.0953, lr_0 = 1.4368e-04
Loss = 1.5634e-03, PNorm = 173.4942, GNorm = 0.1391, lr_0 = 1.4359e-04
Loss = 9.2118e-04, PNorm = 173.4967, GNorm = 0.0254, lr_0 = 1.4349e-04
Loss = 1.1103e-03, PNorm = 173.4979, GNorm = 0.0538, lr_0 = 1.4339e-04
Loss = 1.4119e-03, PNorm = 173.5010, GNorm = 0.1424, lr_0 = 1.4329e-04
Loss = 1.8553e-03, PNorm = 173.5020, GNorm = 0.0904, lr_0 = 1.4319e-04
Loss = 1.7860e-03, PNorm = 173.5041, GNorm = 0.0912, lr_0 = 1.4310e-04
Loss = 1.0146e-03, PNorm = 173.5068, GNorm = 0.1480, lr_0 = 1.4300e-04
Loss = 1.1486e-03, PNorm = 173.5092, GNorm = 0.0517, lr_0 = 1.4290e-04
Loss = 1.4462e-03, PNorm = 173.5115, GNorm = 0.1855, lr_0 = 1.4280e-04
Loss = 1.6974e-03, PNorm = 173.5136, GNorm = 0.0456, lr_0 = 1.4270e-04
Loss = 1.8275e-03, PNorm = 173.5187, GNorm = 0.2181, lr_0 = 1.4261e-04
Loss = 1.5986e-03, PNorm = 173.5224, GNorm = 0.1284, lr_0 = 1.4251e-04
Loss = 1.2242e-03, PNorm = 173.5248, GNorm = 0.1399, lr_0 = 1.4241e-04
Loss = 1.7578e-03, PNorm = 173.5264, GNorm = 0.0917, lr_0 = 1.4231e-04
Loss = 1.0763e-03, PNorm = 173.5289, GNorm = 0.2773, lr_0 = 1.4222e-04
Loss = 1.9227e-03, PNorm = 173.5311, GNorm = 0.2050, lr_0 = 1.4212e-04
Loss = 1.0948e-03, PNorm = 173.5332, GNorm = 0.0720, lr_0 = 1.4202e-04
Loss = 1.1202e-03, PNorm = 173.5357, GNorm = 0.1279, lr_0 = 1.4192e-04
Loss = 1.8050e-03, PNorm = 173.5376, GNorm = 0.1199, lr_0 = 1.4183e-04
Loss = 2.3505e-03, PNorm = 173.5397, GNorm = 0.1457, lr_0 = 1.4173e-04
Loss = 1.5409e-03, PNorm = 173.5423, GNorm = 0.0860, lr_0 = 1.4163e-04
Loss = 2.3984e-03, PNorm = 173.5443, GNorm = 0.2113, lr_0 = 1.4153e-04
Loss = 9.8091e-04, PNorm = 173.5461, GNorm = 0.0699, lr_0 = 1.4144e-04
Loss = 2.5143e-03, PNorm = 173.5490, GNorm = 0.3084, lr_0 = 1.4134e-04
Loss = 1.7401e-03, PNorm = 173.5525, GNorm = 0.0659, lr_0 = 1.4124e-04
Loss = 1.2256e-03, PNorm = 173.5560, GNorm = 0.0756, lr_0 = 1.4115e-04
Loss = 2.2704e-03, PNorm = 173.5594, GNorm = 0.0586, lr_0 = 1.4105e-04
Loss = 1.5564e-03, PNorm = 173.5613, GNorm = 0.0873, lr_0 = 1.4095e-04
Loss = 1.9084e-03, PNorm = 173.5634, GNorm = 0.0600, lr_0 = 1.4086e-04
Loss = 1.2931e-03, PNorm = 173.5650, GNorm = 0.1205, lr_0 = 1.4076e-04
Loss = 2.2985e-03, PNorm = 173.5679, GNorm = 0.1200, lr_0 = 1.4066e-04
Loss = 2.1525e-03, PNorm = 173.5725, GNorm = 0.1343, lr_0 = 1.4057e-04
Loss = 3.0420e-03, PNorm = 173.5737, GNorm = 0.4723, lr_0 = 1.4047e-04
Loss = 9.7878e-04, PNorm = 173.5754, GNorm = 0.0502, lr_0 = 1.4038e-04
Loss = 2.5050e-03, PNorm = 173.5779, GNorm = 0.0861, lr_0 = 1.4028e-04
Loss = 2.3020e-03, PNorm = 173.5806, GNorm = 0.1644, lr_0 = 1.4018e-04
Loss = 1.1841e-03, PNorm = 173.5837, GNorm = 0.0588, lr_0 = 1.4009e-04
Loss = 1.7189e-03, PNorm = 173.5852, GNorm = 0.1639, lr_0 = 1.3999e-04
Loss = 1.2770e-03, PNorm = 173.5867, GNorm = 0.0806, lr_0 = 1.3990e-04
Loss = 1.2141e-03, PNorm = 173.5890, GNorm = 0.0491, lr_0 = 1.3980e-04
Loss = 1.6089e-03, PNorm = 173.5913, GNorm = 0.1474, lr_0 = 1.3970e-04
Loss = 3.0265e-03, PNorm = 173.5926, GNorm = 0.2070, lr_0 = 1.3961e-04
Loss = 1.1646e-03, PNorm = 173.5950, GNorm = 0.0318, lr_0 = 1.3951e-04
Loss = 2.5677e-03, PNorm = 173.5971, GNorm = 0.1110, lr_0 = 1.3942e-04
Loss = 8.7225e-04, PNorm = 173.5991, GNorm = 0.0746, lr_0 = 1.3932e-04
Loss = 1.2992e-03, PNorm = 173.5996, GNorm = 0.0914, lr_0 = 1.3923e-04
Loss = 3.2493e-03, PNorm = 173.6019, GNorm = 0.2394, lr_0 = 1.3913e-04
Loss = 8.9799e-04, PNorm = 173.6032, GNorm = 0.0772, lr_0 = 1.3904e-04
Loss = 9.5997e-04, PNorm = 173.6055, GNorm = 0.0762, lr_0 = 1.3894e-04
Validation mae = 0.277488
Epoch 26
Loss = 1.3732e-03, PNorm = 173.6077, GNorm = 0.0335, lr_0 = 1.3884e-04
Loss = 1.0313e-03, PNorm = 173.6085, GNorm = 0.1104, lr_0 = 1.3875e-04
Loss = 1.6649e-03, PNorm = 173.6090, GNorm = 0.0864, lr_0 = 1.3865e-04
Loss = 1.6407e-03, PNorm = 173.6103, GNorm = 0.0926, lr_0 = 1.3856e-04
Loss = 1.5519e-03, PNorm = 173.6128, GNorm = 0.1470, lr_0 = 1.3846e-04
Loss = 1.2815e-03, PNorm = 173.6158, GNorm = 0.1374, lr_0 = 1.3837e-04
Loss = 1.8120e-03, PNorm = 173.6174, GNorm = 0.0469, lr_0 = 1.3828e-04
Loss = 2.4991e-03, PNorm = 173.6181, GNorm = 0.0453, lr_0 = 1.3818e-04
Loss = 4.7283e-03, PNorm = 173.6192, GNorm = 0.0627, lr_0 = 1.3809e-04
Loss = 1.2465e-03, PNorm = 173.6212, GNorm = 0.0748, lr_0 = 1.3799e-04
Loss = 1.5077e-03, PNorm = 173.6239, GNorm = 0.0607, lr_0 = 1.3790e-04
Loss = 2.2136e-03, PNorm = 173.6251, GNorm = 0.0496, lr_0 = 1.3780e-04
Loss = 9.1047e-04, PNorm = 173.6264, GNorm = 0.0843, lr_0 = 1.3771e-04
Loss = 1.7532e-03, PNorm = 173.6283, GNorm = 0.1361, lr_0 = 1.3761e-04
Loss = 1.4058e-03, PNorm = 173.6289, GNorm = 0.0532, lr_0 = 1.3752e-04
Loss = 1.3139e-03, PNorm = 173.6302, GNorm = 0.0928, lr_0 = 1.3742e-04
Loss = 1.3369e-03, PNorm = 173.6325, GNorm = 0.1370, lr_0 = 1.3733e-04
Loss = 1.2078e-03, PNorm = 173.6361, GNorm = 0.1818, lr_0 = 1.3724e-04
Loss = 2.0156e-03, PNorm = 173.6390, GNorm = 0.0724, lr_0 = 1.3714e-04
Loss = 1.7366e-03, PNorm = 173.6417, GNorm = 0.0393, lr_0 = 1.3705e-04
Loss = 9.6259e-04, PNorm = 173.6451, GNorm = 0.0578, lr_0 = 1.3695e-04
Loss = 1.0675e-03, PNorm = 173.6476, GNorm = 0.2060, lr_0 = 1.3686e-04
Loss = 1.1785e-03, PNorm = 173.6487, GNorm = 0.0448, lr_0 = 1.3677e-04
Loss = 7.3182e-04, PNorm = 173.6499, GNorm = 0.1436, lr_0 = 1.3667e-04
Loss = 9.7323e-04, PNorm = 173.6499, GNorm = 0.0614, lr_0 = 1.3658e-04
Loss = 1.1753e-03, PNorm = 173.6519, GNorm = 0.0611, lr_0 = 1.3649e-04
Loss = 1.2026e-03, PNorm = 173.6522, GNorm = 0.0403, lr_0 = 1.3639e-04
Loss = 9.5919e-04, PNorm = 173.6526, GNorm = 0.0339, lr_0 = 1.3630e-04
Loss = 8.8822e-04, PNorm = 173.6531, GNorm = 0.1006, lr_0 = 1.3621e-04
Loss = 9.2793e-04, PNorm = 173.6551, GNorm = 0.0856, lr_0 = 1.3611e-04
Loss = 3.5937e-03, PNorm = 173.6561, GNorm = 0.1388, lr_0 = 1.3602e-04
Loss = 1.4060e-03, PNorm = 173.6580, GNorm = 0.0910, lr_0 = 1.3593e-04
Loss = 8.8373e-04, PNorm = 173.6594, GNorm = 0.1942, lr_0 = 1.3583e-04
Loss = 1.0909e-03, PNorm = 173.6614, GNorm = 0.1299, lr_0 = 1.3574e-04
Loss = 9.4708e-04, PNorm = 173.6627, GNorm = 0.0433, lr_0 = 1.3565e-04
Loss = 1.4002e-03, PNorm = 173.6666, GNorm = 0.0793, lr_0 = 1.3555e-04
Loss = 7.6497e-04, PNorm = 173.6690, GNorm = 0.0867, lr_0 = 1.3546e-04
Loss = 8.1639e-04, PNorm = 173.6708, GNorm = 0.0733, lr_0 = 1.3537e-04
Loss = 1.6525e-03, PNorm = 173.6714, GNorm = 0.0524, lr_0 = 1.3528e-04
Loss = 7.9632e-04, PNorm = 173.6727, GNorm = 0.0338, lr_0 = 1.3518e-04
Loss = 2.4278e-03, PNorm = 173.6741, GNorm = 0.0544, lr_0 = 1.3509e-04
Loss = 1.0056e-03, PNorm = 173.6761, GNorm = 0.0990, lr_0 = 1.3500e-04
Loss = 1.3186e-03, PNorm = 173.6796, GNorm = 0.1527, lr_0 = 1.3491e-04
Loss = 1.9582e-03, PNorm = 173.6832, GNorm = 0.0575, lr_0 = 1.3481e-04
Loss = 2.0358e-03, PNorm = 173.6857, GNorm = 0.0929, lr_0 = 1.3472e-04
Loss = 2.0291e-03, PNorm = 173.6879, GNorm = 0.1821, lr_0 = 1.3463e-04
Loss = 1.2508e-03, PNorm = 173.6903, GNorm = 0.1230, lr_0 = 1.3454e-04
Loss = 9.8237e-04, PNorm = 173.6917, GNorm = 0.1685, lr_0 = 1.3444e-04
Loss = 1.3401e-03, PNorm = 173.6914, GNorm = 0.0779, lr_0 = 1.3435e-04
Loss = 9.2568e-04, PNorm = 173.6925, GNorm = 0.1071, lr_0 = 1.3426e-04
Loss = 2.0049e-03, PNorm = 173.6939, GNorm = 0.0709, lr_0 = 1.3417e-04
Loss = 1.0344e-03, PNorm = 173.6972, GNorm = 0.1329, lr_0 = 1.3408e-04
Loss = 2.6763e-03, PNorm = 173.7004, GNorm = 0.0981, lr_0 = 1.3398e-04
Loss = 1.7329e-03, PNorm = 173.7039, GNorm = 0.5146, lr_0 = 1.3389e-04
Loss = 2.4765e-03, PNorm = 173.7065, GNorm = 0.1306, lr_0 = 1.3380e-04
Loss = 1.1251e-03, PNorm = 173.7091, GNorm = 0.0834, lr_0 = 1.3371e-04
Loss = 1.1518e-03, PNorm = 173.7100, GNorm = 0.0546, lr_0 = 1.3362e-04
Loss = 1.7188e-03, PNorm = 173.7110, GNorm = 0.0806, lr_0 = 1.3353e-04
Loss = 1.5362e-03, PNorm = 173.7129, GNorm = 0.0717, lr_0 = 1.3343e-04
Loss = 2.6101e-03, PNorm = 173.7147, GNorm = 0.0427, lr_0 = 1.3334e-04
Loss = 3.3116e-03, PNorm = 173.7172, GNorm = 0.1368, lr_0 = 1.3325e-04
Loss = 3.7635e-03, PNorm = 173.7209, GNorm = 0.1116, lr_0 = 1.3316e-04
Loss = 1.2229e-03, PNorm = 173.7232, GNorm = 0.0418, lr_0 = 1.3307e-04
Loss = 1.0644e-03, PNorm = 173.7240, GNorm = 0.1220, lr_0 = 1.3298e-04
Loss = 1.0907e-03, PNorm = 173.7251, GNorm = 0.0521, lr_0 = 1.3289e-04
Loss = 4.3212e-03, PNorm = 173.7245, GNorm = 0.1179, lr_0 = 1.3280e-04
Loss = 1.5758e-03, PNorm = 173.7235, GNorm = 0.3637, lr_0 = 1.3270e-04
Loss = 1.5669e-03, PNorm = 173.7258, GNorm = 0.0488, lr_0 = 1.3261e-04
Loss = 1.5355e-03, PNorm = 173.7283, GNorm = 0.0789, lr_0 = 1.3252e-04
Loss = 7.2970e-04, PNorm = 173.7306, GNorm = 0.1423, lr_0 = 1.3243e-04
Loss = 2.3704e-03, PNorm = 173.7337, GNorm = 0.1736, lr_0 = 1.3234e-04
Loss = 1.2578e-03, PNorm = 173.7349, GNorm = 0.1554, lr_0 = 1.3225e-04
Loss = 2.7245e-03, PNorm = 173.7363, GNorm = 0.1327, lr_0 = 1.3216e-04
Loss = 8.6609e-04, PNorm = 173.7391, GNorm = 0.0641, lr_0 = 1.3207e-04
Loss = 1.8068e-03, PNorm = 173.7406, GNorm = 0.0925, lr_0 = 1.3198e-04
Loss = 1.7352e-03, PNorm = 173.7422, GNorm = 0.2356, lr_0 = 1.3189e-04
Loss = 2.6919e-03, PNorm = 173.7441, GNorm = 0.1327, lr_0 = 1.3180e-04
Loss = 7.6286e-04, PNorm = 173.7465, GNorm = 0.0752, lr_0 = 1.3171e-04
Loss = 1.1538e-03, PNorm = 173.7498, GNorm = 0.0260, lr_0 = 1.3162e-04
Loss = 1.8854e-03, PNorm = 173.7508, GNorm = 0.0970, lr_0 = 1.3153e-04
Loss = 2.1299e-03, PNorm = 173.7527, GNorm = 0.1071, lr_0 = 1.3144e-04
Loss = 1.6303e-03, PNorm = 173.7538, GNorm = 0.1649, lr_0 = 1.3135e-04
Loss = 1.3312e-03, PNorm = 173.7560, GNorm = 0.0685, lr_0 = 1.3126e-04
Loss = 2.7177e-03, PNorm = 173.7576, GNorm = 0.0946, lr_0 = 1.3117e-04
Loss = 9.4273e-04, PNorm = 173.7600, GNorm = 0.0421, lr_0 = 1.3108e-04
Loss = 1.0585e-03, PNorm = 173.7623, GNorm = 0.1106, lr_0 = 1.3099e-04
Loss = 8.3762e-04, PNorm = 173.7649, GNorm = 0.0616, lr_0 = 1.3090e-04
Loss = 1.0030e-03, PNorm = 173.7676, GNorm = 0.1795, lr_0 = 1.3081e-04
Loss = 1.6294e-03, PNorm = 173.7692, GNorm = 0.1804, lr_0 = 1.3072e-04
Loss = 1.6911e-03, PNorm = 173.7695, GNorm = 0.1023, lr_0 = 1.3063e-04
Loss = 1.2966e-03, PNorm = 173.7701, GNorm = 0.0743, lr_0 = 1.3054e-04
Loss = 1.0995e-03, PNorm = 173.7709, GNorm = 0.0454, lr_0 = 1.3045e-04
Loss = 2.3974e-03, PNorm = 173.7739, GNorm = 0.1182, lr_0 = 1.3036e-04
Loss = 1.5830e-03, PNorm = 173.7762, GNorm = 0.0369, lr_0 = 1.3027e-04
Loss = 1.0460e-03, PNorm = 173.7773, GNorm = 0.0751, lr_0 = 1.3018e-04
Loss = 8.9610e-04, PNorm = 173.7786, GNorm = 0.0973, lr_0 = 1.3009e-04
Loss = 2.4573e-03, PNorm = 173.7812, GNorm = 0.3094, lr_0 = 1.3000e-04
Loss = 9.6174e-04, PNorm = 173.7826, GNorm = 0.0856, lr_0 = 1.2992e-04
Loss = 1.4093e-03, PNorm = 173.7842, GNorm = 0.0333, lr_0 = 1.2983e-04
Loss = 9.0984e-04, PNorm = 173.7862, GNorm = 0.0868, lr_0 = 1.2974e-04
Loss = 1.7395e-03, PNorm = 173.7881, GNorm = 0.1023, lr_0 = 1.2965e-04
Loss = 1.2984e-03, PNorm = 173.7909, GNorm = 0.1157, lr_0 = 1.2956e-04
Loss = 1.6586e-03, PNorm = 173.7929, GNorm = 0.1075, lr_0 = 1.2947e-04
Loss = 1.2655e-03, PNorm = 173.7928, GNorm = 0.0871, lr_0 = 1.2938e-04
Loss = 1.1494e-03, PNorm = 173.7939, GNorm = 0.0545, lr_0 = 1.2929e-04
Loss = 2.1413e-03, PNorm = 173.7969, GNorm = 0.1119, lr_0 = 1.2921e-04
Loss = 2.1873e-03, PNorm = 173.7986, GNorm = 0.0512, lr_0 = 1.2912e-04
Loss = 8.2345e-04, PNorm = 173.8007, GNorm = 0.1729, lr_0 = 1.2903e-04
Loss = 1.3883e-03, PNorm = 173.8018, GNorm = 0.0788, lr_0 = 1.2894e-04
Loss = 2.0215e-03, PNorm = 173.8044, GNorm = 0.0953, lr_0 = 1.2885e-04
Loss = 1.2968e-03, PNorm = 173.8074, GNorm = 0.1587, lr_0 = 1.2876e-04
Loss = 1.6022e-03, PNorm = 173.8091, GNorm = 0.1402, lr_0 = 1.2867e-04
Loss = 1.0084e-03, PNorm = 173.8100, GNorm = 0.0681, lr_0 = 1.2859e-04
Loss = 8.1902e-04, PNorm = 173.8121, GNorm = 0.0691, lr_0 = 1.2850e-04
Loss = 1.7973e-03, PNorm = 173.8138, GNorm = 0.1420, lr_0 = 1.2841e-04
Loss = 2.7971e-03, PNorm = 173.8159, GNorm = 0.2169, lr_0 = 1.2832e-04
Loss = 7.6367e-04, PNorm = 173.8174, GNorm = 0.1474, lr_0 = 1.2823e-04
Loss = 1.9414e-03, PNorm = 173.8184, GNorm = 0.0438, lr_0 = 1.2815e-04
Loss = 1.2318e-03, PNorm = 173.8188, GNorm = 0.1450, lr_0 = 1.2806e-04
Loss = 1.0757e-03, PNorm = 173.8207, GNorm = 0.1508, lr_0 = 1.2797e-04
Validation mae = 0.277609
Epoch 27
Loss = 1.8446e-03, PNorm = 173.8214, GNorm = 0.1961, lr_0 = 1.2788e-04
Loss = 1.3818e-03, PNorm = 173.8240, GNorm = 0.1714, lr_0 = 1.2780e-04
Loss = 9.7842e-04, PNorm = 173.8263, GNorm = 0.0993, lr_0 = 1.2771e-04
Loss = 1.2655e-03, PNorm = 173.8281, GNorm = 0.2180, lr_0 = 1.2762e-04
Loss = 1.4924e-03, PNorm = 173.8281, GNorm = 0.0768, lr_0 = 1.2753e-04
Loss = 3.7635e-03, PNorm = 173.8305, GNorm = 0.0770, lr_0 = 1.2745e-04
Loss = 1.3995e-03, PNorm = 173.8319, GNorm = 0.1896, lr_0 = 1.2736e-04
Loss = 7.0879e-04, PNorm = 173.8335, GNorm = 0.0770, lr_0 = 1.2727e-04
Loss = 7.0676e-04, PNorm = 173.8350, GNorm = 0.0455, lr_0 = 1.2718e-04
Loss = 7.3305e-04, PNorm = 173.8369, GNorm = 0.0697, lr_0 = 1.2710e-04
Loss = 1.1616e-03, PNorm = 173.8378, GNorm = 0.1373, lr_0 = 1.2701e-04
Loss = 7.4083e-04, PNorm = 173.8391, GNorm = 0.0776, lr_0 = 1.2692e-04
Loss = 8.1198e-04, PNorm = 173.8414, GNorm = 0.0809, lr_0 = 1.2684e-04
Loss = 9.6134e-04, PNorm = 173.8442, GNorm = 0.0963, lr_0 = 1.2675e-04
Loss = 7.0415e-04, PNorm = 173.8455, GNorm = 0.1205, lr_0 = 1.2666e-04
Loss = 1.0334e-03, PNorm = 173.8475, GNorm = 0.0589, lr_0 = 1.2658e-04
Loss = 1.2719e-03, PNorm = 173.8469, GNorm = 0.1053, lr_0 = 1.2649e-04
Loss = 8.9687e-04, PNorm = 173.8482, GNorm = 0.1108, lr_0 = 1.2640e-04
Loss = 1.1181e-03, PNorm = 173.8486, GNorm = 0.0489, lr_0 = 1.2632e-04
Loss = 1.0897e-03, PNorm = 173.8497, GNorm = 0.1324, lr_0 = 1.2623e-04
Loss = 6.7848e-04, PNorm = 173.8507, GNorm = 0.0601, lr_0 = 1.2614e-04
Loss = 2.4490e-03, PNorm = 173.8523, GNorm = 0.0784, lr_0 = 1.2606e-04
Loss = 2.9425e-03, PNorm = 173.8525, GNorm = 0.1413, lr_0 = 1.2597e-04
Loss = 1.2407e-03, PNorm = 173.8537, GNorm = 0.1993, lr_0 = 1.2588e-04
Loss = 1.1606e-03, PNorm = 173.8555, GNorm = 0.0373, lr_0 = 1.2580e-04
Loss = 2.6422e-03, PNorm = 173.8574, GNorm = 0.0577, lr_0 = 1.2571e-04
Loss = 7.6226e-04, PNorm = 173.8582, GNorm = 0.0945, lr_0 = 1.2563e-04
Loss = 1.2078e-03, PNorm = 173.8593, GNorm = 0.0491, lr_0 = 1.2554e-04
Loss = 1.8991e-03, PNorm = 173.8605, GNorm = 0.0404, lr_0 = 1.2545e-04
Loss = 9.8679e-04, PNorm = 173.8624, GNorm = 0.0533, lr_0 = 1.2537e-04
Loss = 7.8801e-04, PNorm = 173.8641, GNorm = 0.1123, lr_0 = 1.2528e-04
Loss = 7.9060e-04, PNorm = 173.8657, GNorm = 0.1210, lr_0 = 1.2520e-04
Loss = 8.2950e-04, PNorm = 173.8663, GNorm = 0.1194, lr_0 = 1.2511e-04
Loss = 8.7175e-04, PNorm = 173.8669, GNorm = 0.0472, lr_0 = 1.2502e-04
Loss = 1.6762e-03, PNorm = 173.8681, GNorm = 0.1673, lr_0 = 1.2494e-04
Loss = 2.0576e-03, PNorm = 173.8691, GNorm = 0.1645, lr_0 = 1.2485e-04
Loss = 1.1181e-03, PNorm = 173.8699, GNorm = 0.0607, lr_0 = 1.2477e-04
Loss = 1.4882e-03, PNorm = 173.8717, GNorm = 0.0590, lr_0 = 1.2468e-04
Loss = 7.2017e-04, PNorm = 173.8738, GNorm = 0.0620, lr_0 = 1.2460e-04
Loss = 1.2460e-03, PNorm = 173.8755, GNorm = 0.1107, lr_0 = 1.2451e-04
Loss = 1.5716e-03, PNorm = 173.8763, GNorm = 0.0547, lr_0 = 1.2443e-04
Loss = 2.5054e-03, PNorm = 173.8772, GNorm = 0.0733, lr_0 = 1.2434e-04
Loss = 1.5881e-03, PNorm = 173.8772, GNorm = 0.0958, lr_0 = 1.2426e-04
Loss = 1.0468e-03, PNorm = 173.8791, GNorm = 0.0658, lr_0 = 1.2417e-04
Loss = 1.0166e-03, PNorm = 173.8823, GNorm = 0.0496, lr_0 = 1.2409e-04
Loss = 1.0445e-03, PNorm = 173.8845, GNorm = 0.0646, lr_0 = 1.2400e-04
Loss = 1.9312e-03, PNorm = 173.8866, GNorm = 0.0312, lr_0 = 1.2392e-04
Loss = 1.8487e-03, PNorm = 173.8873, GNorm = 0.0920, lr_0 = 1.2383e-04
Loss = 1.1856e-03, PNorm = 173.8890, GNorm = 0.1470, lr_0 = 1.2375e-04
Loss = 6.9331e-04, PNorm = 173.8909, GNorm = 0.0779, lr_0 = 1.2366e-04
Loss = 1.8407e-03, PNorm = 173.8934, GNorm = 0.0901, lr_0 = 1.2358e-04
Loss = 2.7115e-03, PNorm = 173.8951, GNorm = 0.1464, lr_0 = 1.2349e-04
Loss = 1.7027e-03, PNorm = 173.8967, GNorm = 0.1763, lr_0 = 1.2341e-04
Loss = 1.7133e-03, PNorm = 173.8991, GNorm = 0.1923, lr_0 = 1.2332e-04
Loss = 2.4128e-03, PNorm = 173.8999, GNorm = 0.1448, lr_0 = 1.2324e-04
Loss = 1.3044e-03, PNorm = 173.9033, GNorm = 0.1412, lr_0 = 1.2315e-04
Loss = 8.6116e-04, PNorm = 173.9047, GNorm = 0.0556, lr_0 = 1.2307e-04
Loss = 1.6697e-03, PNorm = 173.9079, GNorm = 0.1495, lr_0 = 1.2298e-04
Loss = 2.0076e-03, PNorm = 173.9100, GNorm = 0.0948, lr_0 = 1.2290e-04
Loss = 2.7154e-03, PNorm = 173.9131, GNorm = 0.2046, lr_0 = 1.2282e-04
Loss = 7.8923e-04, PNorm = 173.9141, GNorm = 0.1458, lr_0 = 1.2273e-04
Loss = 1.5339e-03, PNorm = 173.9154, GNorm = 0.0551, lr_0 = 1.2265e-04
Loss = 1.3833e-03, PNorm = 173.9150, GNorm = 0.1142, lr_0 = 1.2256e-04
Loss = 1.7508e-03, PNorm = 173.9145, GNorm = 0.0301, lr_0 = 1.2248e-04
Loss = 8.4409e-04, PNorm = 173.9158, GNorm = 0.0685, lr_0 = 1.2240e-04
Loss = 2.7135e-03, PNorm = 173.9167, GNorm = 0.1007, lr_0 = 1.2231e-04
Loss = 1.0383e-03, PNorm = 173.9200, GNorm = 0.0456, lr_0 = 1.2223e-04
Loss = 1.3021e-03, PNorm = 173.9238, GNorm = 0.1092, lr_0 = 1.2214e-04
Loss = 7.6805e-04, PNorm = 173.9261, GNorm = 0.0776, lr_0 = 1.2206e-04
Loss = 8.5029e-04, PNorm = 173.9257, GNorm = 0.0456, lr_0 = 1.2198e-04
Loss = 9.2044e-04, PNorm = 173.9263, GNorm = 0.1874, lr_0 = 1.2189e-04
Loss = 9.3490e-04, PNorm = 173.9287, GNorm = 0.0976, lr_0 = 1.2181e-04
Loss = 7.4528e-04, PNorm = 173.9316, GNorm = 0.0857, lr_0 = 1.2173e-04
Loss = 9.1727e-04, PNorm = 173.9352, GNorm = 0.2329, lr_0 = 1.2164e-04
Loss = 1.8906e-03, PNorm = 173.9365, GNorm = 0.0767, lr_0 = 1.2156e-04
Loss = 8.2547e-04, PNorm = 173.9375, GNorm = 0.0627, lr_0 = 1.2148e-04
Loss = 9.8529e-04, PNorm = 173.9402, GNorm = 0.0612, lr_0 = 1.2139e-04
Loss = 1.0466e-03, PNorm = 173.9429, GNorm = 0.0708, lr_0 = 1.2131e-04
Loss = 1.2991e-03, PNorm = 173.9447, GNorm = 0.1012, lr_0 = 1.2123e-04
Loss = 1.0094e-03, PNorm = 173.9450, GNorm = 0.1402, lr_0 = 1.2114e-04
Loss = 1.6298e-03, PNorm = 173.9456, GNorm = 0.0323, lr_0 = 1.2106e-04
Loss = 1.2394e-03, PNorm = 173.9462, GNorm = 0.0472, lr_0 = 1.2098e-04
Loss = 6.7794e-04, PNorm = 173.9484, GNorm = 0.0748, lr_0 = 1.2090e-04
Loss = 1.0498e-03, PNorm = 173.9509, GNorm = 0.0548, lr_0 = 1.2081e-04
Loss = 9.5409e-04, PNorm = 173.9541, GNorm = 0.0597, lr_0 = 1.2073e-04
Loss = 3.2944e-03, PNorm = 173.9565, GNorm = 0.1554, lr_0 = 1.2065e-04
Loss = 1.3827e-03, PNorm = 173.9589, GNorm = 0.1260, lr_0 = 1.2056e-04
Loss = 2.0681e-03, PNorm = 173.9604, GNorm = 0.0892, lr_0 = 1.2048e-04
Loss = 1.1372e-03, PNorm = 173.9609, GNorm = 0.1116, lr_0 = 1.2040e-04
Loss = 1.8759e-03, PNorm = 173.9630, GNorm = 0.1416, lr_0 = 1.2032e-04
Loss = 1.0252e-03, PNorm = 173.9659, GNorm = 0.0400, lr_0 = 1.2023e-04
Loss = 1.2330e-03, PNorm = 173.9675, GNorm = 0.1142, lr_0 = 1.2015e-04
Loss = 6.6838e-04, PNorm = 173.9682, GNorm = 0.0563, lr_0 = 1.2007e-04
Loss = 1.1684e-03, PNorm = 173.9701, GNorm = 0.0763, lr_0 = 1.1999e-04
Loss = 1.4209e-03, PNorm = 173.9727, GNorm = 0.1266, lr_0 = 1.1991e-04
Loss = 6.6724e-04, PNorm = 173.9747, GNorm = 0.0411, lr_0 = 1.1982e-04
Loss = 1.7504e-03, PNorm = 173.9757, GNorm = 0.0933, lr_0 = 1.1974e-04
Loss = 2.1520e-03, PNorm = 173.9775, GNorm = 0.1272, lr_0 = 1.1966e-04
Loss = 2.2462e-03, PNorm = 173.9785, GNorm = 0.1715, lr_0 = 1.1958e-04
Loss = 1.5962e-03, PNorm = 173.9796, GNorm = 0.0451, lr_0 = 1.1950e-04
Loss = 1.4686e-03, PNorm = 173.9808, GNorm = 0.0688, lr_0 = 1.1941e-04
Loss = 1.4440e-03, PNorm = 173.9820, GNorm = 0.0995, lr_0 = 1.1933e-04
Loss = 2.3760e-03, PNorm = 173.9827, GNorm = 0.0794, lr_0 = 1.1925e-04
Loss = 8.6760e-04, PNorm = 173.9840, GNorm = 0.0426, lr_0 = 1.1917e-04
Loss = 3.5871e-03, PNorm = 173.9865, GNorm = 0.1274, lr_0 = 1.1909e-04
Loss = 1.3293e-03, PNorm = 173.9896, GNorm = 0.2357, lr_0 = 1.1901e-04
Loss = 1.0419e-03, PNorm = 173.9914, GNorm = 0.1412, lr_0 = 1.1892e-04
Loss = 1.1878e-03, PNorm = 173.9923, GNorm = 0.0577, lr_0 = 1.1884e-04
Loss = 1.7618e-03, PNorm = 173.9949, GNorm = 0.0529, lr_0 = 1.1876e-04
Loss = 6.6831e-04, PNorm = 173.9964, GNorm = 0.1003, lr_0 = 1.1868e-04
Loss = 1.7770e-03, PNorm = 173.9966, GNorm = 0.0521, lr_0 = 1.1860e-04
Loss = 2.5654e-03, PNorm = 173.9976, GNorm = 0.0403, lr_0 = 1.1852e-04
Loss = 1.5968e-03, PNorm = 173.9988, GNorm = 0.0851, lr_0 = 1.1844e-04
Loss = 9.6031e-04, PNorm = 173.9995, GNorm = 0.0798, lr_0 = 1.1835e-04
Loss = 1.3079e-03, PNorm = 174.0020, GNorm = 0.0662, lr_0 = 1.1827e-04
Loss = 8.6394e-04, PNorm = 174.0034, GNorm = 0.1002, lr_0 = 1.1819e-04
Loss = 2.2560e-03, PNorm = 174.0067, GNorm = 0.0785, lr_0 = 1.1811e-04
Loss = 2.5529e-03, PNorm = 174.0092, GNorm = 0.0925, lr_0 = 1.1803e-04
Loss = 1.5615e-03, PNorm = 174.0120, GNorm = 0.2183, lr_0 = 1.1795e-04
Loss = 2.6614e-03, PNorm = 174.0125, GNorm = 0.0323, lr_0 = 1.1787e-04
Validation mae = 0.277632
Epoch 28
Loss = 6.8811e-04, PNorm = 174.0141, GNorm = 0.2338, lr_0 = 1.1779e-04
Loss = 1.3185e-03, PNorm = 174.0144, GNorm = 0.0664, lr_0 = 1.1771e-04
Loss = 7.3604e-04, PNorm = 174.0149, GNorm = 0.0703, lr_0 = 1.1763e-04
Loss = 9.4318e-04, PNorm = 174.0153, GNorm = 0.0277, lr_0 = 1.1755e-04
Loss = 1.0636e-03, PNorm = 174.0163, GNorm = 0.0366, lr_0 = 1.1747e-04
Loss = 8.2212e-04, PNorm = 174.0182, GNorm = 0.0618, lr_0 = 1.1739e-04
Loss = 9.8933e-04, PNorm = 174.0191, GNorm = 0.1290, lr_0 = 1.1730e-04
Loss = 8.2197e-04, PNorm = 174.0195, GNorm = 0.0675, lr_0 = 1.1722e-04
Loss = 1.0893e-03, PNorm = 174.0195, GNorm = 0.0772, lr_0 = 1.1714e-04
Loss = 1.5525e-03, PNorm = 174.0192, GNorm = 0.0753, lr_0 = 1.1706e-04
Loss = 1.3475e-03, PNorm = 174.0209, GNorm = 0.1223, lr_0 = 1.1698e-04
Loss = 5.9520e-04, PNorm = 174.0227, GNorm = 0.0210, lr_0 = 1.1690e-04
Loss = 1.0819e-03, PNorm = 174.0245, GNorm = 0.1300, lr_0 = 1.1682e-04
Loss = 1.4251e-03, PNorm = 174.0262, GNorm = 0.0717, lr_0 = 1.1674e-04
Loss = 5.8649e-04, PNorm = 174.0276, GNorm = 0.0774, lr_0 = 1.1666e-04
Loss = 2.5540e-03, PNorm = 174.0286, GNorm = 0.1224, lr_0 = 1.1658e-04
Loss = 1.0303e-03, PNorm = 174.0288, GNorm = 0.1233, lr_0 = 1.1650e-04
Loss = 9.3037e-04, PNorm = 174.0306, GNorm = 0.1212, lr_0 = 1.1642e-04
Loss = 1.6220e-03, PNorm = 174.0325, GNorm = 0.0641, lr_0 = 1.1634e-04
Loss = 1.1596e-03, PNorm = 174.0344, GNorm = 0.0449, lr_0 = 1.1626e-04
Loss = 8.1654e-04, PNorm = 174.0360, GNorm = 0.1075, lr_0 = 1.1618e-04
Loss = 1.3138e-03, PNorm = 174.0358, GNorm = 0.1048, lr_0 = 1.1611e-04
Loss = 1.2197e-03, PNorm = 174.0381, GNorm = 0.0890, lr_0 = 1.1603e-04
Loss = 1.1190e-03, PNorm = 174.0398, GNorm = 0.1579, lr_0 = 1.1595e-04
Loss = 9.1746e-04, PNorm = 174.0412, GNorm = 0.0583, lr_0 = 1.1587e-04
Loss = 1.5047e-03, PNorm = 174.0419, GNorm = 0.0867, lr_0 = 1.1579e-04
Loss = 7.4941e-04, PNorm = 174.0422, GNorm = 0.0729, lr_0 = 1.1571e-04
Loss = 7.9319e-04, PNorm = 174.0446, GNorm = 0.0880, lr_0 = 1.1563e-04
Loss = 1.9919e-03, PNorm = 174.0441, GNorm = 0.2537, lr_0 = 1.1555e-04
Loss = 1.5601e-03, PNorm = 174.0460, GNorm = 0.1064, lr_0 = 1.1547e-04
Loss = 1.0221e-03, PNorm = 174.0466, GNorm = 0.0469, lr_0 = 1.1539e-04
Loss = 6.2134e-04, PNorm = 174.0485, GNorm = 0.0450, lr_0 = 1.1531e-04
Loss = 1.7298e-03, PNorm = 174.0503, GNorm = 0.0544, lr_0 = 1.1523e-04
Loss = 7.9286e-04, PNorm = 174.0512, GNorm = 0.0450, lr_0 = 1.1515e-04
Loss = 1.5763e-03, PNorm = 174.0511, GNorm = 0.0500, lr_0 = 1.1508e-04
Loss = 1.2414e-03, PNorm = 174.0511, GNorm = 0.0589, lr_0 = 1.1500e-04
Loss = 2.5859e-03, PNorm = 174.0530, GNorm = 0.0716, lr_0 = 1.1492e-04
Loss = 1.4624e-03, PNorm = 174.0539, GNorm = 0.0743, lr_0 = 1.1484e-04
Loss = 1.3889e-03, PNorm = 174.0532, GNorm = 0.1855, lr_0 = 1.1476e-04
Loss = 9.5435e-04, PNorm = 174.0541, GNorm = 0.0513, lr_0 = 1.1468e-04
Loss = 1.0547e-03, PNorm = 174.0558, GNorm = 0.1132, lr_0 = 1.1460e-04
Loss = 1.7177e-03, PNorm = 174.0587, GNorm = 0.0798, lr_0 = 1.1452e-04
Loss = 6.5365e-04, PNorm = 174.0601, GNorm = 0.0974, lr_0 = 1.1445e-04
Loss = 5.3566e-04, PNorm = 174.0611, GNorm = 0.0521, lr_0 = 1.1437e-04
Loss = 6.8174e-04, PNorm = 174.0618, GNorm = 0.0959, lr_0 = 1.1429e-04
Loss = 1.4864e-03, PNorm = 174.0619, GNorm = 0.0612, lr_0 = 1.1421e-04
Loss = 1.8358e-03, PNorm = 174.0633, GNorm = 0.1527, lr_0 = 1.1413e-04
Loss = 5.4481e-04, PNorm = 174.0656, GNorm = 0.0932, lr_0 = 1.1405e-04
Loss = 1.0980e-03, PNorm = 174.0670, GNorm = 0.0962, lr_0 = 1.1398e-04
Loss = 7.0256e-04, PNorm = 174.0679, GNorm = 0.0283, lr_0 = 1.1390e-04
Loss = 2.3702e-03, PNorm = 174.0683, GNorm = 0.2130, lr_0 = 1.1382e-04
Loss = 1.5172e-03, PNorm = 174.0699, GNorm = 0.1105, lr_0 = 1.1374e-04
Loss = 2.2779e-03, PNorm = 174.0714, GNorm = 0.0327, lr_0 = 1.1366e-04
Loss = 2.0818e-03, PNorm = 174.0716, GNorm = 0.2242, lr_0 = 1.1359e-04
Loss = 8.6989e-04, PNorm = 174.0733, GNorm = 0.0842, lr_0 = 1.1351e-04
Loss = 9.8234e-04, PNorm = 174.0749, GNorm = 0.1568, lr_0 = 1.1343e-04
Loss = 8.7016e-04, PNorm = 174.0761, GNorm = 0.0781, lr_0 = 1.1335e-04
Loss = 1.5691e-03, PNorm = 174.0772, GNorm = 0.0501, lr_0 = 1.1328e-04
Loss = 1.0578e-03, PNorm = 174.0775, GNorm = 0.0667, lr_0 = 1.1320e-04
Loss = 8.2169e-04, PNorm = 174.0781, GNorm = 0.1341, lr_0 = 1.1312e-04
Loss = 6.8513e-04, PNorm = 174.0800, GNorm = 0.0835, lr_0 = 1.1304e-04
Loss = 1.4422e-03, PNorm = 174.0813, GNorm = 0.0678, lr_0 = 1.1297e-04
Loss = 1.9894e-03, PNorm = 174.0833, GNorm = 0.1122, lr_0 = 1.1289e-04
Loss = 8.3332e-04, PNorm = 174.0839, GNorm = 0.0375, lr_0 = 1.1281e-04
Loss = 1.2077e-03, PNorm = 174.0856, GNorm = 0.0303, lr_0 = 1.1273e-04
Loss = 1.2472e-03, PNorm = 174.0864, GNorm = 0.3322, lr_0 = 1.1266e-04
Loss = 6.9881e-04, PNorm = 174.0874, GNorm = 0.0703, lr_0 = 1.1258e-04
Loss = 1.3506e-03, PNorm = 174.0898, GNorm = 0.0948, lr_0 = 1.1250e-04
Loss = 1.4638e-03, PNorm = 174.0913, GNorm = 0.1527, lr_0 = 1.1243e-04
Loss = 1.2136e-03, PNorm = 174.0930, GNorm = 0.0636, lr_0 = 1.1235e-04
Loss = 2.4552e-03, PNorm = 174.0951, GNorm = 0.1329, lr_0 = 1.1227e-04
Loss = 6.6167e-04, PNorm = 174.0960, GNorm = 0.0896, lr_0 = 1.1219e-04
Loss = 1.7314e-03, PNorm = 174.0969, GNorm = 0.1283, lr_0 = 1.1212e-04
Loss = 1.6976e-03, PNorm = 174.0968, GNorm = 0.2879, lr_0 = 1.1204e-04
Loss = 1.8916e-03, PNorm = 174.0975, GNorm = 0.1042, lr_0 = 1.1196e-04
Loss = 1.8659e-03, PNorm = 174.0985, GNorm = 0.0610, lr_0 = 1.1189e-04
Loss = 5.9065e-04, PNorm = 174.1003, GNorm = 0.0816, lr_0 = 1.1181e-04
Loss = 5.6791e-04, PNorm = 174.1021, GNorm = 0.1227, lr_0 = 1.1173e-04
Loss = 7.9044e-04, PNorm = 174.1036, GNorm = 0.0629, lr_0 = 1.1166e-04
Loss = 2.7841e-03, PNorm = 174.1045, GNorm = 0.1576, lr_0 = 1.1158e-04
Loss = 1.0576e-03, PNorm = 174.1054, GNorm = 0.0523, lr_0 = 1.1150e-04
Loss = 1.0484e-03, PNorm = 174.1074, GNorm = 0.0916, lr_0 = 1.1143e-04
Loss = 1.0831e-03, PNorm = 174.1103, GNorm = 0.0333, lr_0 = 1.1135e-04
Loss = 8.1686e-04, PNorm = 174.1129, GNorm = 0.1303, lr_0 = 1.1128e-04
Loss = 2.3307e-03, PNorm = 174.1160, GNorm = 0.2938, lr_0 = 1.1120e-04
Loss = 6.7372e-04, PNorm = 174.1174, GNorm = 0.0376, lr_0 = 1.1112e-04
Loss = 7.0983e-04, PNorm = 174.1199, GNorm = 0.1022, lr_0 = 1.1105e-04
Loss = 2.9800e-03, PNorm = 174.1229, GNorm = 0.1554, lr_0 = 1.1097e-04
Loss = 5.9995e-04, PNorm = 174.1240, GNorm = 0.0407, lr_0 = 1.1089e-04
Loss = 2.9856e-03, PNorm = 174.1252, GNorm = 0.0560, lr_0 = 1.1082e-04
Loss = 9.2307e-04, PNorm = 174.1273, GNorm = 0.0555, lr_0 = 1.1074e-04
Loss = 3.1488e-03, PNorm = 174.1294, GNorm = 0.1439, lr_0 = 1.1067e-04
Loss = 5.6222e-04, PNorm = 174.1307, GNorm = 0.0347, lr_0 = 1.1059e-04
Loss = 6.6971e-04, PNorm = 174.1306, GNorm = 0.0635, lr_0 = 1.1052e-04
Loss = 7.5988e-04, PNorm = 174.1312, GNorm = 0.0456, lr_0 = 1.1044e-04
Loss = 8.4380e-04, PNorm = 174.1329, GNorm = 0.0447, lr_0 = 1.1036e-04
Loss = 5.9028e-04, PNorm = 174.1350, GNorm = 0.0462, lr_0 = 1.1029e-04
Loss = 2.0284e-03, PNorm = 174.1362, GNorm = 0.1841, lr_0 = 1.1021e-04
Loss = 2.0921e-03, PNorm = 174.1394, GNorm = 0.1399, lr_0 = 1.1014e-04
Loss = 1.8049e-03, PNorm = 174.1426, GNorm = 0.0416, lr_0 = 1.1006e-04
Loss = 1.2463e-03, PNorm = 174.1457, GNorm = 0.1265, lr_0 = 1.0999e-04
Loss = 2.2402e-03, PNorm = 174.1470, GNorm = 0.1303, lr_0 = 1.0991e-04
Loss = 1.1566e-03, PNorm = 174.1467, GNorm = 0.0674, lr_0 = 1.0984e-04
Loss = 1.1201e-03, PNorm = 174.1488, GNorm = 0.1641, lr_0 = 1.0976e-04
Loss = 9.9089e-04, PNorm = 174.1504, GNorm = 0.1674, lr_0 = 1.0969e-04
Loss = 7.0506e-04, PNorm = 174.1507, GNorm = 0.0733, lr_0 = 1.0961e-04
Loss = 6.4966e-04, PNorm = 174.1510, GNorm = 0.0531, lr_0 = 1.0954e-04
Loss = 5.7065e-04, PNorm = 174.1522, GNorm = 0.0536, lr_0 = 1.0946e-04
Loss = 1.6688e-03, PNorm = 174.1546, GNorm = 0.1442, lr_0 = 1.0939e-04
Loss = 1.0225e-03, PNorm = 174.1563, GNorm = 0.0995, lr_0 = 1.0931e-04
Loss = 1.4825e-03, PNorm = 174.1583, GNorm = 0.0994, lr_0 = 1.0924e-04
Loss = 1.0189e-03, PNorm = 174.1598, GNorm = 0.0888, lr_0 = 1.0916e-04
Loss = 1.9079e-03, PNorm = 174.1606, GNorm = 0.1526, lr_0 = 1.0909e-04
Loss = 1.8116e-03, PNorm = 174.1615, GNorm = 0.0513, lr_0 = 1.0901e-04
Loss = 5.4764e-03, PNorm = 174.1622, GNorm = 0.1147, lr_0 = 1.0894e-04
Loss = 1.8170e-03, PNorm = 174.1635, GNorm = 0.0954, lr_0 = 1.0886e-04
Loss = 1.6266e-03, PNorm = 174.1652, GNorm = 0.0650, lr_0 = 1.0879e-04
Loss = 1.1482e-03, PNorm = 174.1652, GNorm = 0.0714, lr_0 = 1.0871e-04
Loss = 6.8461e-04, PNorm = 174.1663, GNorm = 0.0911, lr_0 = 1.0864e-04
Loss = 3.2230e-03, PNorm = 174.1679, GNorm = 0.0758, lr_0 = 1.0856e-04
Validation mae = 0.277354
Epoch 29
Loss = 1.1368e-03, PNorm = 174.1692, GNorm = 0.2174, lr_0 = 1.0849e-04
Loss = 1.8635e-03, PNorm = 174.1693, GNorm = 0.1240, lr_0 = 1.0841e-04
Loss = 7.1709e-04, PNorm = 174.1701, GNorm = 0.0752, lr_0 = 1.0834e-04
Loss = 7.2163e-04, PNorm = 174.1709, GNorm = 0.0792, lr_0 = 1.0827e-04
Loss = 6.9604e-04, PNorm = 174.1707, GNorm = 0.0301, lr_0 = 1.0819e-04
Loss = 2.2002e-03, PNorm = 174.1729, GNorm = 0.0735, lr_0 = 1.0812e-04
Loss = 1.4299e-03, PNorm = 174.1743, GNorm = 0.1247, lr_0 = 1.0804e-04
Loss = 1.4522e-03, PNorm = 174.1749, GNorm = 0.1036, lr_0 = 1.0797e-04
Loss = 5.5181e-04, PNorm = 174.1750, GNorm = 0.2174, lr_0 = 1.0790e-04
Loss = 6.9361e-04, PNorm = 174.1749, GNorm = 0.0831, lr_0 = 1.0782e-04
Loss = 6.1818e-04, PNorm = 174.1761, GNorm = 0.0788, lr_0 = 1.0775e-04
Loss = 6.6377e-04, PNorm = 174.1763, GNorm = 0.1289, lr_0 = 1.0767e-04
Loss = 5.0766e-04, PNorm = 174.1769, GNorm = 0.0788, lr_0 = 1.0760e-04
Loss = 2.7234e-03, PNorm = 174.1771, GNorm = 0.0803, lr_0 = 1.0753e-04
Loss = 6.7277e-04, PNorm = 174.1792, GNorm = 0.0377, lr_0 = 1.0745e-04
Loss = 6.1568e-04, PNorm = 174.1808, GNorm = 0.1321, lr_0 = 1.0738e-04
Loss = 8.5508e-04, PNorm = 174.1815, GNorm = 0.1013, lr_0 = 1.0731e-04
Loss = 6.7144e-04, PNorm = 174.1828, GNorm = 0.0354, lr_0 = 1.0723e-04
Loss = 2.7997e-03, PNorm = 174.1842, GNorm = 0.0363, lr_0 = 1.0716e-04
Loss = 5.7653e-04, PNorm = 174.1852, GNorm = 0.0867, lr_0 = 1.0709e-04
Loss = 1.3737e-03, PNorm = 174.1866, GNorm = 0.0947, lr_0 = 1.0701e-04
Loss = 2.0491e-03, PNorm = 174.1879, GNorm = 0.1052, lr_0 = 1.0694e-04
Loss = 1.2621e-03, PNorm = 174.1884, GNorm = 0.0566, lr_0 = 1.0687e-04
Loss = 9.3789e-04, PNorm = 174.1890, GNorm = 0.1014, lr_0 = 1.0679e-04
Loss = 9.7890e-04, PNorm = 174.1904, GNorm = 0.0350, lr_0 = 1.0672e-04
Loss = 6.2710e-04, PNorm = 174.1915, GNorm = 0.1432, lr_0 = 1.0665e-04
Loss = 2.8752e-03, PNorm = 174.1923, GNorm = 0.0336, lr_0 = 1.0657e-04
Loss = 6.5071e-04, PNorm = 174.1926, GNorm = 0.0582, lr_0 = 1.0650e-04
Loss = 1.0057e-03, PNorm = 174.1937, GNorm = 0.0649, lr_0 = 1.0643e-04
Loss = 5.3144e-04, PNorm = 174.1937, GNorm = 0.1074, lr_0 = 1.0635e-04
Loss = 5.3406e-04, PNorm = 174.1947, GNorm = 0.0766, lr_0 = 1.0628e-04
Loss = 5.6969e-04, PNorm = 174.1948, GNorm = 0.0524, lr_0 = 1.0621e-04
Loss = 1.0418e-03, PNorm = 174.1959, GNorm = 0.0946, lr_0 = 1.0614e-04
Loss = 2.3706e-03, PNorm = 174.1979, GNorm = 0.2017, lr_0 = 1.0606e-04
Loss = 9.1886e-04, PNorm = 174.1991, GNorm = 0.0742, lr_0 = 1.0599e-04
Loss = 7.1564e-04, PNorm = 174.2006, GNorm = 0.0317, lr_0 = 1.0592e-04
Loss = 8.5178e-04, PNorm = 174.2027, GNorm = 0.1208, lr_0 = 1.0585e-04
Loss = 9.1237e-04, PNorm = 174.2042, GNorm = 0.0720, lr_0 = 1.0577e-04
Loss = 9.6254e-04, PNorm = 174.2051, GNorm = 0.1217, lr_0 = 1.0570e-04
Loss = 5.7737e-04, PNorm = 174.2061, GNorm = 0.1147, lr_0 = 1.0563e-04
Loss = 5.2632e-04, PNorm = 174.2062, GNorm = 0.0657, lr_0 = 1.0556e-04
Loss = 2.2132e-03, PNorm = 174.2059, GNorm = 0.0924, lr_0 = 1.0548e-04
Loss = 5.0485e-04, PNorm = 174.2071, GNorm = 0.0923, lr_0 = 1.0541e-04
Loss = 6.9473e-04, PNorm = 174.2090, GNorm = 0.0578, lr_0 = 1.0534e-04
Loss = 5.3804e-04, PNorm = 174.2114, GNorm = 0.0830, lr_0 = 1.0527e-04
Loss = 8.2149e-04, PNorm = 174.2128, GNorm = 0.0491, lr_0 = 1.0519e-04
Loss = 2.7456e-03, PNorm = 174.2126, GNorm = 0.1070, lr_0 = 1.0512e-04
Loss = 8.8573e-04, PNorm = 174.2129, GNorm = 0.0433, lr_0 = 1.0505e-04
Loss = 1.3943e-03, PNorm = 174.2132, GNorm = 0.0904, lr_0 = 1.0498e-04
Loss = 7.0134e-04, PNorm = 174.2141, GNorm = 0.0440, lr_0 = 1.0491e-04
Loss = 1.3399e-03, PNorm = 174.2160, GNorm = 0.0954, lr_0 = 1.0483e-04
Loss = 6.3314e-04, PNorm = 174.2172, GNorm = 0.0482, lr_0 = 1.0476e-04
Loss = 5.4806e-04, PNorm = 174.2197, GNorm = 0.0767, lr_0 = 1.0469e-04
Loss = 8.4368e-04, PNorm = 174.2209, GNorm = 0.1130, lr_0 = 1.0462e-04
Loss = 4.5293e-04, PNorm = 174.2220, GNorm = 0.0523, lr_0 = 1.0455e-04
Loss = 5.2028e-04, PNorm = 174.2227, GNorm = 0.0877, lr_0 = 1.0448e-04
Loss = 5.7570e-04, PNorm = 174.2250, GNorm = 0.0670, lr_0 = 1.0440e-04
Loss = 3.0959e-03, PNorm = 174.2275, GNorm = 0.1631, lr_0 = 1.0433e-04
Loss = 1.5389e-03, PNorm = 174.2295, GNorm = 0.1491, lr_0 = 1.0426e-04
Loss = 2.0962e-03, PNorm = 174.2303, GNorm = 0.0609, lr_0 = 1.0419e-04
Loss = 1.6061e-03, PNorm = 174.2295, GNorm = 0.0689, lr_0 = 1.0412e-04
Loss = 1.6360e-03, PNorm = 174.2297, GNorm = 0.0423, lr_0 = 1.0405e-04
Loss = 8.1613e-04, PNorm = 174.2308, GNorm = 0.0363, lr_0 = 1.0398e-04
Loss = 1.3943e-03, PNorm = 174.2314, GNorm = 0.0372, lr_0 = 1.0391e-04
Loss = 9.8853e-04, PNorm = 174.2321, GNorm = 0.0617, lr_0 = 1.0383e-04
Loss = 1.3852e-03, PNorm = 174.2327, GNorm = 0.1083, lr_0 = 1.0376e-04
Loss = 1.0160e-03, PNorm = 174.2331, GNorm = 0.0898, lr_0 = 1.0369e-04
Loss = 4.2267e-03, PNorm = 174.2332, GNorm = 0.0613, lr_0 = 1.0362e-04
Loss = 2.0436e-03, PNorm = 174.2342, GNorm = 0.0941, lr_0 = 1.0355e-04
Loss = 1.7365e-03, PNorm = 174.2358, GNorm = 0.0879, lr_0 = 1.0348e-04
Loss = 9.2958e-04, PNorm = 174.2372, GNorm = 0.0807, lr_0 = 1.0341e-04
Loss = 5.5401e-04, PNorm = 174.2391, GNorm = 0.0341, lr_0 = 1.0334e-04
Loss = 7.6171e-04, PNorm = 174.2402, GNorm = 0.0419, lr_0 = 1.0327e-04
Loss = 1.4043e-03, PNorm = 174.2410, GNorm = 0.1501, lr_0 = 1.0320e-04
Loss = 7.7527e-04, PNorm = 174.2425, GNorm = 0.0957, lr_0 = 1.0312e-04
Loss = 1.9010e-03, PNorm = 174.2429, GNorm = 0.1769, lr_0 = 1.0305e-04
Loss = 1.3666e-03, PNorm = 174.2435, GNorm = 0.1233, lr_0 = 1.0298e-04
Loss = 4.9922e-04, PNorm = 174.2455, GNorm = 0.0515, lr_0 = 1.0291e-04
Loss = 1.2765e-03, PNorm = 174.2464, GNorm = 0.0508, lr_0 = 1.0284e-04
Loss = 2.1197e-03, PNorm = 174.2475, GNorm = 0.0377, lr_0 = 1.0277e-04
Loss = 1.2278e-03, PNorm = 174.2482, GNorm = 0.0902, lr_0 = 1.0270e-04
Loss = 1.4946e-03, PNorm = 174.2498, GNorm = 0.0392, lr_0 = 1.0263e-04
Loss = 2.3760e-03, PNorm = 174.2516, GNorm = 0.2106, lr_0 = 1.0256e-04
Loss = 3.5679e-03, PNorm = 174.2530, GNorm = 0.0746, lr_0 = 1.0249e-04
Loss = 8.4410e-04, PNorm = 174.2555, GNorm = 0.0890, lr_0 = 1.0242e-04
Loss = 1.6909e-03, PNorm = 174.2574, GNorm = 0.0726, lr_0 = 1.0235e-04
Loss = 1.2999e-03, PNorm = 174.2584, GNorm = 0.0867, lr_0 = 1.0228e-04
Loss = 6.1020e-04, PNorm = 174.2593, GNorm = 0.0864, lr_0 = 1.0221e-04
Loss = 9.5058e-04, PNorm = 174.2609, GNorm = 0.0303, lr_0 = 1.0214e-04
Loss = 8.3542e-04, PNorm = 174.2630, GNorm = 0.1434, lr_0 = 1.0207e-04
Loss = 2.1118e-03, PNorm = 174.2647, GNorm = 0.6015, lr_0 = 1.0200e-04
Loss = 7.3845e-04, PNorm = 174.2672, GNorm = 0.1998, lr_0 = 1.0193e-04
Loss = 4.2925e-04, PNorm = 174.2686, GNorm = 0.1007, lr_0 = 1.0186e-04
Loss = 6.3440e-04, PNorm = 174.2695, GNorm = 0.2058, lr_0 = 1.0179e-04
Loss = 2.5297e-03, PNorm = 174.2705, GNorm = 0.1606, lr_0 = 1.0172e-04
Loss = 1.0220e-03, PNorm = 174.2724, GNorm = 0.0277, lr_0 = 1.0165e-04
Loss = 1.2766e-03, PNorm = 174.2737, GNorm = 0.1025, lr_0 = 1.0158e-04
Loss = 5.0268e-04, PNorm = 174.2746, GNorm = 0.0444, lr_0 = 1.0151e-04
Loss = 2.3278e-03, PNorm = 174.2750, GNorm = 0.0210, lr_0 = 1.0144e-04
Loss = 6.3143e-04, PNorm = 174.2765, GNorm = 0.0342, lr_0 = 1.0137e-04
Loss = 1.3578e-03, PNorm = 174.2776, GNorm = 0.0537, lr_0 = 1.0130e-04
Loss = 1.5898e-03, PNorm = 174.2788, GNorm = 0.0821, lr_0 = 1.0123e-04
Loss = 2.4884e-03, PNorm = 174.2790, GNorm = 0.0694, lr_0 = 1.0116e-04
Loss = 5.8437e-04, PNorm = 174.2807, GNorm = 0.0271, lr_0 = 1.0110e-04
Loss = 1.2611e-03, PNorm = 174.2821, GNorm = 0.0924, lr_0 = 1.0103e-04
Loss = 1.0655e-03, PNorm = 174.2844, GNorm = 0.0681, lr_0 = 1.0096e-04
Loss = 8.0603e-04, PNorm = 174.2843, GNorm = 0.0410, lr_0 = 1.0089e-04
Loss = 5.0508e-04, PNorm = 174.2846, GNorm = 0.0415, lr_0 = 1.0082e-04
Loss = 9.5950e-04, PNorm = 174.2858, GNorm = 0.0650, lr_0 = 1.0075e-04
Loss = 1.6228e-03, PNorm = 174.2863, GNorm = 0.0459, lr_0 = 1.0068e-04
Loss = 6.4476e-04, PNorm = 174.2879, GNorm = 0.0498, lr_0 = 1.0061e-04
Loss = 6.8677e-04, PNorm = 174.2893, GNorm = 0.0692, lr_0 = 1.0054e-04
Loss = 1.1408e-03, PNorm = 174.2909, GNorm = 0.2115, lr_0 = 1.0047e-04
Loss = 9.5597e-04, PNorm = 174.2908, GNorm = 0.2446, lr_0 = 1.0041e-04
Loss = 7.3171e-04, PNorm = 174.2920, GNorm = 0.0894, lr_0 = 1.0034e-04
Loss = 8.3002e-04, PNorm = 174.2932, GNorm = 0.1160, lr_0 = 1.0027e-04
Loss = 2.5947e-03, PNorm = 174.2939, GNorm = 0.0601, lr_0 = 1.0020e-04
Loss = 1.0233e-03, PNorm = 174.2955, GNorm = 0.1395, lr_0 = 1.0013e-04
Loss = 1.4446e-03, PNorm = 174.2969, GNorm = 0.0460, lr_0 = 1.0006e-04
Loss = 1.2960e-03, PNorm = 174.2978, GNorm = 0.0354, lr_0 = 1.0000e-04
Validation mae = 0.277462
Model 0 best validation mae = 0.277354 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.275528
Ensemble test mae = 0.275528
10-fold cross validation
	Seed 0 ==> test mae = 0.273259
	Seed 1 ==> test mae = 0.273786
	Seed 2 ==> test mae = 0.273545
	Seed 3 ==> test mae = 0.273563
	Seed 4 ==> test mae = 0.274277
	Seed 5 ==> test mae = 0.273797
	Seed 6 ==> test mae = 0.272474
	Seed 7 ==> test mae = 0.274740
	Seed 8 ==> test mae = 0.274369
	Seed 9 ==> test mae = 0.275528
Overall test mae = 0.273934 +/- 0.000801
Elapsed time = 4:42:59
